xref: /petsc/config/BuildSystem/retrieval.py (revision a69119a591a03a9d906b29c0a4e9802e4d7c9795)
1from __future__ import absolute_import
2import logger
3
4import os
5from urllib.request import urlretrieve
6from urllib import parse as urlparse_local
7import config.base
8import socket
9import shutil
10
11# Fix parsing for nonstandard schemes
12urlparse_local.uses_netloc.extend(['bk', 'ssh', 'svn'])
13
14class Retriever(logger.Logger):
15  def __init__(self, sourceControl, clArgs = None, argDB = None):
16    logger.Logger.__init__(self, clArgs, argDB)
17    self.sourceControl = sourceControl
18    self.gitsubmodules = []
19    self.gitprereq = 1
20    self.git_urls = []
21    self.hg_urls = []
22    self.dir_urls = []
23    self.link_urls = []
24    self.tarball_urls = []
25    self.stamp = None
26    return
27
28  def isGitURL(self, url):
29    parsed = urlparse_local.urlparse(url)
30    if (parsed[0] == 'git') or (parsed[0] == 'ssh' and parsed[2].endswith('.git')) or (parsed[0] == 'https' and parsed[2].endswith('.git')):
31      return True
32    elif os.path.isdir(url) and self.isDirectoryGitRepo(url):
33      return True
34    return False
35
36  def setupURLs(self,packagename,urls,gitsubmodules,gitprereq):
37    self.packagename = packagename
38    self.gitsubmodules = gitsubmodules
39    self.gitprereq = gitprereq
40    for url in urls:
41      parsed = urlparse_local.urlparse(url)
42      if self.isGitURL(url):
43        self.git_urls.append(self.removePrefix(url,'git://'))
44      elif parsed[0] == 'hg'or (parsed[0] == 'ssh' and parsed[1].startswith('hg@')):
45        self.hg_urls.append(self.removePrefix(url,'hg://'))
46      elif parsed[0] == 'dir' or os.path.isdir(url):
47        self.dir_urls.append(self.removePrefix(url,'dir://'))
48      elif parsed[0] == 'link':
49        self.link_urls.append(self.removePrefix(url,'link://'))
50      else:
51        # check for ftp.mcs.anl.gov - and use https://,www.mcs.anl.gov,ftp://
52        if url.find('ftp.mcs.anl.gov') != -1:
53          https_url = url.replace('http://','https://').replace('ftp://','http://')
54          self.tarball_urls.extend([https_url,https_url.replace('ftp.mcs.anl.gov/pub/petsc/','www.mcs.anl.gov/petsc/mirror/'),https_url.replace('https://','ftp')])
55        else:
56          self.tarball_urls.extend([url])
57
58  def isDirectoryGitRepo(self, directory):
59    if not hasattr(self.sourceControl, 'git'):
60      self.logPrint('git not found in self.sourceControl - cannot evaluate isDirectoryGitRepo(): '+directory)
61      return False
62    from config.base import Configure
63    for loc in ['.git','']:
64      cmd = '%s rev-parse --resolve-git-dir  %s'  % (self.sourceControl.git, os.path.join(directory,loc))
65      (output, error, ret) = Configure.executeShellCommand(cmd, checkCommand = Configure.passCheckCommand, log = self.log)
66      if not ret:
67        return True
68    return False
69
70  @staticmethod
71  def removeTarget(t):
72    if os.path.islink(t) or os.path.isfile(t):
73      os.unlink(t) # same as os.remove(t)
74    elif os.path.isdir(t):
75      shutil.rmtree(t)
76
77  @staticmethod
78  def getDownloadFailureMessage(package, url, filename=None):
79    slashFilename = '/'+filename if filename else ''
80    return '''\
81Unable to download package %s from: %s
82* If URL specified manually - perhaps there is a typo?
83* If your network is disconnected - please reconnect and rerun ./configure
84* Or perhaps you have a firewall blocking the download
85* You can run with --with-packages-download-dir=/adirectory and ./configure will instruct you what packages to download manually
86* or you can download the above URL manually, to /yourselectedlocation%s
87  and use the configure option:
88  --download-%s=/yourselectedlocation%s
89    ''' % (package.upper(), url, slashFilename, package, slashFilename)
90
91  @staticmethod
92  def removePrefix(url,prefix):
93    '''Replacement for str.removeprefix() supported only since Python 3.9'''
94    if url.startswith(prefix):
95      return url[len(prefix):]
96    return url
97
98  def generateURLs(self):
99    if hasattr(self.sourceControl, 'git') and self.gitprereq:
100      for url in self.git_urls:
101        yield('git',url)
102    else:
103      self.logPrint('Git not found or gitprereq check failed! skipping giturls: '+str(self.git_urls)+'\n')
104    if hasattr(self.sourceControl, 'hg'):
105      for url in self.hg_urls:
106        yield('hg',url)
107    else:
108      self.logPrint('Hg not found - skipping hgurls: '+str(self.hg_urls)+'\n')
109    for url in self.dir_urls:
110      yield('dir',url)
111    for url in self.link_urls:
112      yield('link',url)
113    for url in self.tarball_urls:
114      yield('tarball',url)
115
116  def genericRetrieve(self,proto,url,root):
117    '''Fetch package from version control repository or tarfile indicated by URL and extract it into root'''
118    if proto == 'git':
119      return self.gitRetrieve(url,root)
120    elif proto == 'hg':
121      return self.hgRetrieve(url,root)
122    elif proto == 'dir':
123      return self.dirRetrieve(url,root)
124    elif proto == 'link':
125      self.linkRetrieve(url,root)
126    elif proto == 'tarball':
127      self.tarballRetrieve(url,root)
128
129  def dirRetrieve(self, url, root):
130    self.logPrint('Retrieving %s as directory' % url, 3, 'install')
131    if not os.path.isdir(url): raise RuntimeError('URL %s is not a directory' % url)
132
133    t = os.path.join(root,os.path.basename(url))
134    self.removeTarget(t)
135    shutil.copytree(url,t)
136
137  def linkRetrieve(self, url, root):
138    self.logPrint('Retrieving %s as link' % url, 3, 'install')
139    if not os.path.isdir(url): raise RuntimeError('URL %s is not pointing to a directory' % url)
140
141    t = os.path.join(root,os.path.basename(url))
142    self.removeTarget(t)
143    os.symlink(os.path.abspath(url),t)
144
145  def gitRetrieve(self, url, root):
146    self.logPrint('Retrieving %s as git repo' % url, 3, 'install')
147    if not hasattr(self.sourceControl, 'git'):
148      raise RuntimeError('self.sourceControl.git not set')
149    if os.path.isdir(url) and not self.isDirectoryGitRepo(url):
150      raise RuntimeError('URL %s is a directory but not a git repository' % url)
151
152    newgitrepo = os.path.join(root,'git.'+self.packagename)
153    self.removeTarget(newgitrepo)
154
155    try:
156      submodopt =''
157      for itm in self.gitsubmodules:
158        submodopt += ' --recurse-submodules='+itm
159      config.base.Configure.executeShellCommand('%s clone %s %s %s' % (self.sourceControl.git, submodopt, url, newgitrepo), log = self.log, timeout = 120.0)
160    except  RuntimeError as e:
161      self.logPrint('ERROR: '+str(e))
162      err = str(e)
163      failureMessage = self.getDownloadFailureMessage(self.packagename, url)
164      raise RuntimeError('Unable to clone '+self.packagename+'\n'+err+failureMessage)
165
166  def hgRetrieve(self, url, root):
167    self.logPrint('Retrieving %s as hg repo' % url, 3, 'install')
168    if not hasattr(self.sourceControl, 'hg'):
169      raise RuntimeError('self.sourceControl.hg not set')
170
171    newgitrepo = os.path.join(root,'hg.'+self.packagename)
172    self.removeTarget(newgitrepo)
173    try:
174      config.base.Configure.executeShellCommand('%s clone %s %s' % (self.sourceControl.hg, url, newgitrepo), log = self.log, timeout = 120.0)
175    except  RuntimeError as e:
176      self.logPrint('ERROR: '+str(e))
177      err = str(e)
178      failureMessage = self.getDownloadFailureMessage(self.packagename, url)
179      raise RuntimeError('Unable to clone '+self.packagename+'\n'+err+failureMessage)
180
181  def tarballRetrieve(self, url, root):
182    parsed = urlparse_local.urlparse(url)
183    filename = os.path.basename(parsed[2])
184    localFile = os.path.join(root,'_d_'+filename)
185    self.logPrint('Retrieving %s as tarball to %s' % (url,localFile) , 3, 'install')
186    ext =  os.path.splitext(localFile)[1]
187    if ext not in ['.bz2','.tbz','.gz','.tgz','.zip','.ZIP']:
188      raise RuntimeError('Unknown compression type in URL: '+ url)
189
190    self.removeTarget(localFile)
191
192    if parsed[0] == 'file' and not parsed[1]:
193      url = parsed[2]
194    if os.path.exists(url):
195      if not os.path.isfile(url):
196        raise RuntimeError('Local path exists but is not a regular file: '+ url)
197      # copy local file
198      shutil.copyfile(url, localFile)
199    else:
200      # fetch remote file
201      try:
202        sav_timeout = socket.getdefaulttimeout()
203        socket.setdefaulttimeout(30)
204        urlretrieve(url, localFile)
205        socket.setdefaulttimeout(sav_timeout)
206      except Exception as e:
207        socket.setdefaulttimeout(sav_timeout)
208        failureMessage = self.getDownloadFailureMessage(self.packagename, url, filename)
209        raise RuntimeError(failureMessage)
210
211    self.logPrint('Extracting '+localFile)
212    if ext in ['.zip','.ZIP']:
213      config.base.Configure.executeShellCommand('cd '+root+'; unzip '+localFile, log = self.log)
214      output = config.base.Configure.executeShellCommand('cd '+root+'; zipinfo -1 '+localFile+' | head -n 1', log = self.log)
215      dirname = os.path.normpath(output[0].strip())
216    else:
217      failureMessage = '''\
218Downloaded package %s from: %s is not a tarball.
219[or installed python cannot process compressed files]
220* If you are behind a firewall - please fix your proxy and rerun ./configure
221  For example at LANL you may need to set the environmental variable http_proxy (or HTTP_PROXY?) to  http://proxyout.lanl.gov
222* You can run with --with-packages-download-dir=/adirectory and ./configure will instruct you what packages to download manually
223* or you can download the above URL manually, to /yourselectedlocation/%s
224  and use the configure option:
225  --download-%s=/yourselectedlocation/%s
226''' % (self.packagename.upper(), url, filename, self.packagename, filename)
227      import tarfile
228      try:
229        tf  = tarfile.open(os.path.join(root, localFile))
230      except tarfile.ReadError as e:
231        raise RuntimeError(str(e)+'\n'+failureMessage)
232      if not tf: raise RuntimeError(failureMessage)
233      #git puts 'pax_global_header' as the first entry and some tar utils process this as a file
234      firstname = tf.getnames()[0]
235      if firstname == 'pax_global_header':
236        firstmember = tf.getmembers()[1]
237      else:
238        firstmember = tf.getmembers()[0]
239      # some tarfiles list packagename/ but some list packagename/filename in the first entry
240      if firstmember.isdir():
241        dirname = firstmember.name
242      else:
243        dirname = os.path.dirname(firstmember.name)
244      tf.extractall(root)
245      tf.close()
246
247    # fix file permissions for the untared tarballs.
248    try:
249      # check if 'dirname' is set'
250      if dirname:
251        config.base.Configure.executeShellCommand('cd '+root+'; chmod -R a+r '+dirname+';find  '+dirname + ' -type d -name "*" -exec chmod a+rx {} \;', log = self.log)
252      else:
253        self.logPrintBox('WARNING: Could not determine dirname extracted by '+localFile+' to fix file permissions')
254    except RuntimeError as e:
255      raise RuntimeError('Error changing permissions for '+dirname+' obtained from '+localFile+ ' : '+str(e))
256    os.unlink(localFile)
257