mirror of
				https://github.com/optim-enterprises-bv/openwrt-ipq.git
				synced 2025-11-03 19:58:34 +00:00 
			
		
		
		
	Currently the git protocol downloads all submodules of the target repository. This can be unwieldy for repositories with a lot of submodules where only a subset are required in the context of the OpenWrt build. This change adds a PKG_SOURCE_SUBMODULES variable to configure this behavior. It takes a space-separated list of submodule paths, or the word "skip" to disable submodule downloads entirely. The default is to download all submodules, i.e. preserving current behavior. Signed-off-by: Karsten Sperling <ksperling@apple.com>
		
			
				
	
	
		
			433 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			433 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/env python3
 | 
						|
#
 | 
						|
# Copyright (c) 2018 Yousong Zhou <yszhou4tech@gmail.com>
 | 
						|
#
 | 
						|
# This is free software, licensed under the GNU General Public License v2.
 | 
						|
# See /LICENSE for more information.
 | 
						|
 | 
						|
import argparse
 | 
						|
import calendar
 | 
						|
import datetime
 | 
						|
import errno
 | 
						|
import fcntl
 | 
						|
import hashlib
 | 
						|
import json
 | 
						|
import os
 | 
						|
import os.path
 | 
						|
import re
 | 
						|
import shutil
 | 
						|
import ssl
 | 
						|
import subprocess
 | 
						|
import sys
 | 
						|
import time
 | 
						|
import urllib.request
 | 
						|
 | 
						|
TMPDIR = os.environ.get('TMP_DIR') or '/tmp'
 | 
						|
TMPDIR_DL = os.path.join(TMPDIR, 'dl')
 | 
						|
 | 
						|
 | 
						|
class PathException(Exception): pass
 | 
						|
class DownloadGitHubError(Exception): pass
 | 
						|
 | 
						|
 | 
						|
class Path(object):
 | 
						|
    """Context class for preparing and cleaning up directories.
 | 
						|
 | 
						|
    If ```preclean` is ``False``, ``path`` will NOT be removed on context enter
 | 
						|
 | 
						|
    If ``path`` ``isdir``, then it will be created on context enter.
 | 
						|
 | 
						|
    If ``keep`` is True, then ``path`` will NOT be removed on context exit
 | 
						|
    """
 | 
						|
 | 
						|
    def __init__(self, path, isdir=True, preclean=False, keep=False):
 | 
						|
        self.path = path
 | 
						|
        self.isdir = isdir
 | 
						|
        self.preclean = preclean
 | 
						|
        self.keep = keep
 | 
						|
 | 
						|
    def __enter__(self):
 | 
						|
        if self.preclean:
 | 
						|
            self.rm_all(self.path)
 | 
						|
        if self.isdir:
 | 
						|
            self.mkdir_all(self.path)
 | 
						|
        return self
 | 
						|
 | 
						|
    def __exit__(self, exc_type, exc_value, traceback):
 | 
						|
        if not self.keep:
 | 
						|
            self.rm_all(self.path)
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def mkdir_all(path):
 | 
						|
        """Same as mkdir -p."""
 | 
						|
        names = os.path.split(path)
 | 
						|
        p = ''
 | 
						|
        for name in names:
 | 
						|
            p = os.path.join(p, name)
 | 
						|
            Path._mkdir(p)
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def _rmdir_dir(dir_):
 | 
						|
        names = Path._listdir(dir_)
 | 
						|
        for name in names:
 | 
						|
            p = os.path.join(dir_, name)
 | 
						|
            Path.rm_all(p)
 | 
						|
        Path._rmdir(dir_)
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def _mkdir(path):
 | 
						|
        Path._os_func(os.mkdir, path, errno.EEXIST)
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def _rmdir(path):
 | 
						|
        Path._os_func(os.rmdir, path, errno.ENOENT)
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def _remove(path):
 | 
						|
        Path._os_func(os.remove, path, errno.ENOENT)
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def _listdir(path):
 | 
						|
        return Path._os_func(os.listdir, path, errno.ENOENT, default=[])
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def _os_func(func, path, errno, default=None):
 | 
						|
        """Call func(path) in an idempotent way.
 | 
						|
 | 
						|
        On exception ``ex``, if the type is OSError and ``ex.errno == errno``,
 | 
						|
        return ``default``, otherwise, re-raise
 | 
						|
        """
 | 
						|
        try:
 | 
						|
            return func(path)
 | 
						|
        except OSError as e:
 | 
						|
            if e.errno == errno:
 | 
						|
                return default
 | 
						|
            else:
 | 
						|
                raise
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def rm_all(path):
 | 
						|
        """Same as rm -r."""
 | 
						|
        if os.path.islink(path):
 | 
						|
            Path._remove(path)
 | 
						|
        elif os.path.isdir(path):
 | 
						|
            Path._rmdir_dir(path)
 | 
						|
        else:
 | 
						|
            Path._remove(path)
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def untar(path, into=None):
 | 
						|
        """Extract tarball at ``path`` into subdir ``into``.
 | 
						|
 | 
						|
        return subdir name if and only if there exists one, otherwise raise PathException
 | 
						|
        """
 | 
						|
        args = ('tar', '-C', into, '-xzf', path, '--no-same-permissions')
 | 
						|
        subprocess.check_call(args, preexec_fn=lambda: os.umask(0o22))
 | 
						|
        dirs = os.listdir(into)
 | 
						|
        if len(dirs) == 1:
 | 
						|
            return dirs[0]
 | 
						|
        else:
 | 
						|
            raise PathException('untar %s: expecting a single subdir, got %s' % (path, dirs))
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def tar(path, subdir, into=None, ts=None):
 | 
						|
        """Pack ``path`` into tarball ``into``."""
 | 
						|
        # --sort=name requires a recent build of GNU tar
 | 
						|
        args = ['tar', '--numeric-owner', '--owner=0', '--group=0', '--sort=name', '--mode=a-s']
 | 
						|
        args += ['-C', path, '-cf', into, subdir]
 | 
						|
        envs = os.environ.copy()
 | 
						|
        if ts is not None:
 | 
						|
            args.append('--mtime=@%d' % ts)
 | 
						|
        if into.endswith('.xz'):
 | 
						|
            envs['XZ_OPT'] = '-7e'
 | 
						|
            args.append('-J')
 | 
						|
        elif into.endswith('.bz2'):
 | 
						|
            args.append('-j')
 | 
						|
        elif into.endswith('.gz'):
 | 
						|
            args.append('-z')
 | 
						|
            envs['GZIP'] = '-n'
 | 
						|
        else:
 | 
						|
            raise PathException('unknown compression type %s' % into)
 | 
						|
        subprocess.check_call(args, env=envs)
 | 
						|
 | 
						|
 | 
						|
class GitHubCommitTsCache(object):
 | 
						|
    __cachef = 'github.commit.ts.cache'
 | 
						|
    __cachen = 2048
 | 
						|
 | 
						|
    def __init__(self):
 | 
						|
        Path.mkdir_all(TMPDIR_DL)
 | 
						|
        self.cachef = os.path.join(TMPDIR_DL, self.__cachef)
 | 
						|
        self.cache = {}
 | 
						|
 | 
						|
    def get(self, k):
 | 
						|
        """Get timestamp with key ``k``."""
 | 
						|
        fileno = os.open(self.cachef, os.O_RDONLY | os.O_CREAT)
 | 
						|
        with os.fdopen(fileno) as fin:
 | 
						|
            try:
 | 
						|
                fcntl.lockf(fileno, fcntl.LOCK_SH)
 | 
						|
                self._cache_init(fin)
 | 
						|
                if k in self.cache:
 | 
						|
                    ts = self.cache[k][0]
 | 
						|
                    return ts
 | 
						|
            finally:
 | 
						|
                fcntl.lockf(fileno, fcntl.LOCK_UN)
 | 
						|
        return None
 | 
						|
 | 
						|
    def set(self, k, v):
 | 
						|
        """Update timestamp with ``k``."""
 | 
						|
        fileno = os.open(self.cachef, os.O_RDWR | os.O_CREAT)
 | 
						|
        with os.fdopen(fileno, 'w+') as f:
 | 
						|
            try:
 | 
						|
                fcntl.lockf(fileno, fcntl.LOCK_EX)
 | 
						|
                self._cache_init(f)
 | 
						|
                self.cache[k] = (v, int(time.time()))
 | 
						|
                self._cache_flush(f)
 | 
						|
            finally:
 | 
						|
                fcntl.lockf(fileno, fcntl.LOCK_UN)
 | 
						|
 | 
						|
    def _cache_init(self, fin):
 | 
						|
        for line in fin:
 | 
						|
            k, ts, updated = line.split()
 | 
						|
            ts = int(ts)
 | 
						|
            updated = int(updated)
 | 
						|
            self.cache[k] = (ts, updated)
 | 
						|
 | 
						|
    def _cache_flush(self, fout):
 | 
						|
        cache = sorted(self.cache.items(), key=lambda a: a[1][1])
 | 
						|
        cache = cache[:self.__cachen]
 | 
						|
        self.cache = {}
 | 
						|
        os.ftruncate(fout.fileno(), 0)
 | 
						|
        fout.seek(0, os.SEEK_SET)
 | 
						|
        for k, ent in cache:
 | 
						|
            ts = ent[0]
 | 
						|
            updated = ent[1]
 | 
						|
            line = '{0} {1} {2}\n'.format(k, ts, updated)
 | 
						|
            fout.write(line)
 | 
						|
 | 
						|
 | 
						|
class DownloadGitHubTarball(object):
 | 
						|
    """Download and repack archive tarball from GitHub.
 | 
						|
 | 
						|
    Compared with the method of packing after cloning the whole repo, this
 | 
						|
    method is more friendly to users with fragile internet connection.
 | 
						|
 | 
						|
    However, there are limitations with this method
 | 
						|
 | 
						|
     - GitHub imposes a 60 reqs/hour limit for unauthenticated API access.
 | 
						|
       This affects fetching commit date for reproducible tarballs.  Download
 | 
						|
       through the archive link is not affected.
 | 
						|
 | 
						|
     - GitHub archives do not contain source codes for submodules.
 | 
						|
 | 
						|
     - GitHub archives seem to respect .gitattributes and ignore paths with
 | 
						|
       export-ignore attributes.
 | 
						|
 | 
						|
    For the first two issues, the method will fail loudly to allow fallback to
 | 
						|
    clone-then-pack method.
 | 
						|
 | 
						|
    As for the 3rd issue, to make sure that this method only produces identical
 | 
						|
    tarballs as the fallback method, we require the expected hash value to be
 | 
						|
    supplied.  That means the first tarball will need to be prepared by the
 | 
						|
    clone-then-pack method
 | 
						|
    """
 | 
						|
 | 
						|
    __repo_url_regex = re.compile(r'^(?:https|git)://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)')
 | 
						|
 | 
						|
    def __init__(self, args):
 | 
						|
        self.dl_dir = args.dl_dir
 | 
						|
        self.version = args.version
 | 
						|
        self.subdir = args.subdir
 | 
						|
        self.source = args.source
 | 
						|
        self.submodules = args.submodules
 | 
						|
        self.url = args.url
 | 
						|
        self._init_owner_repo()
 | 
						|
        self.xhash = args.hash
 | 
						|
        self._init_hasher()
 | 
						|
        self.commit_ts = None           # lazy load commit timestamp
 | 
						|
        self.commit_ts_cache = GitHubCommitTsCache()
 | 
						|
        self.name = 'github-tarball'
 | 
						|
 | 
						|
    def download(self):
 | 
						|
        """Download and repack GitHub archive tarball."""
 | 
						|
        if self.submodules and self.submodules != ['skip']:
 | 
						|
            raise self._error('Fetching submodules is not yet supported')
 | 
						|
        self._init_commit_ts()
 | 
						|
        with Path(TMPDIR_DL, keep=True) as dir_dl:
 | 
						|
            # fetch tarball from GitHub
 | 
						|
            tarball_path = os.path.join(dir_dl.path, self.subdir + '.tar.gz.dl')
 | 
						|
            with Path(tarball_path, isdir=False):
 | 
						|
                self._fetch(tarball_path)
 | 
						|
                # unpack
 | 
						|
                d = os.path.join(dir_dl.path, self.subdir + '.untar')
 | 
						|
                with Path(d, preclean=True) as dir_untar:
 | 
						|
                    tarball_prefix = Path.untar(tarball_path, into=dir_untar.path)
 | 
						|
                    dir0 = os.path.join(dir_untar.path, tarball_prefix)
 | 
						|
                    dir1 = os.path.join(dir_untar.path, self.subdir)
 | 
						|
                    # submodules check
 | 
						|
                    if self.submodules != ['skip'] and self._has_submodule(dir0):
 | 
						|
                        raise self._error('Fetching submodules is not yet supported')
 | 
						|
                    # rename subdir
 | 
						|
                    os.rename(dir0, dir1)
 | 
						|
                    # repack
 | 
						|
                    into=os.path.join(TMPDIR_DL, self.source)
 | 
						|
                    Path.tar(dir_untar.path, self.subdir, into=into, ts=self.commit_ts)
 | 
						|
                    try:
 | 
						|
                        self._hash_check(into)
 | 
						|
                    except Exception:
 | 
						|
                        Path.rm_all(into)
 | 
						|
                        raise
 | 
						|
                    # move to target location
 | 
						|
                    file1 = os.path.join(self.dl_dir, self.source)
 | 
						|
                    if into != file1:
 | 
						|
                        shutil.move(into, file1)
 | 
						|
 | 
						|
    def _has_submodule(self, dir_):
 | 
						|
        m = os.path.join(dir_, '.gitmodules')
 | 
						|
        try:
 | 
						|
            st = os.stat(m)
 | 
						|
            return st.st_size > 0
 | 
						|
        except OSError as e:
 | 
						|
            return e.errno != errno.ENOENT
 | 
						|
 | 
						|
    def _init_owner_repo(self):
 | 
						|
        m = self.__repo_url_regex.search(self.url)
 | 
						|
        if m is None:
 | 
						|
            raise self._error('Invalid github url: {}'.format(self.url))
 | 
						|
        owner = m.group('owner')
 | 
						|
        repo = m.group('repo')
 | 
						|
        if repo.endswith('.git'):
 | 
						|
            repo = repo[:-4]
 | 
						|
        self.owner = owner
 | 
						|
        self.repo = repo
 | 
						|
 | 
						|
    def _init_hasher(self):
 | 
						|
        xhash = self.xhash
 | 
						|
        if len(xhash) == 64:
 | 
						|
            self.hasher = hashlib.sha256()
 | 
						|
        elif len(xhash) == 32:
 | 
						|
            self.hasher = hashlib.md5()
 | 
						|
        else:
 | 
						|
            raise self._error('Requires sha256sum for verification')
 | 
						|
        self.xhash = xhash
 | 
						|
 | 
						|
    def _hash_check(self, f):
 | 
						|
        with open(f, 'rb') as fin:
 | 
						|
            while True:
 | 
						|
                d = fin.read(4096)
 | 
						|
                if not d:
 | 
						|
                    break
 | 
						|
                self.hasher.update(d)
 | 
						|
        xhash = self.hasher.hexdigest()
 | 
						|
        if xhash != self.xhash:
 | 
						|
            raise self._error('Wrong hash (probably caused by .gitattributes), expecting {}, got {}'.format(self.xhash, xhash))
 | 
						|
 | 
						|
    def _init_commit_ts(self):
 | 
						|
        if self.commit_ts is not None:
 | 
						|
            return
 | 
						|
        # GitHub provides 2 APIs[1,2] for fetching commit data.  API[1] is more
 | 
						|
        # terse while API[2] provides more verbose info such as commit diff
 | 
						|
        # etc.  That's the main reason why API[1] is preferred: the response
 | 
						|
        # size is predictable.
 | 
						|
        #
 | 
						|
        # However, API[1] only accepts complete commit sha1sum as the parameter
 | 
						|
        # while API[2] is more liberal accepting also partial commit id and
 | 
						|
        # tags, etc.
 | 
						|
        #
 | 
						|
        # [1] Get a single commit, Repositories, https://developer.github.com/v3/repos/commits/#get-a-single-commit
 | 
						|
        # [2] Git Commits, Git Data, https://developer.github.com/v3/git/commits/#get-a-commit
 | 
						|
        apis = [
 | 
						|
            {
 | 
						|
                'url': self._make_repo_url_path('git', 'commits', self.version),
 | 
						|
                'attr_path': ('committer', 'date'),
 | 
						|
            }, {
 | 
						|
                'url': self._make_repo_url_path('commits', self.version),
 | 
						|
                'attr_path': ('commit', 'committer', 'date'),
 | 
						|
            },
 | 
						|
        ]
 | 
						|
        version_is_sha1sum = len(self.version) == 40
 | 
						|
        if not version_is_sha1sum:
 | 
						|
            apis.insert(0, apis.pop())
 | 
						|
        reasons = ''
 | 
						|
        for api in apis:
 | 
						|
            url = api['url']
 | 
						|
            attr_path = api['attr_path']
 | 
						|
            try:
 | 
						|
                ct = self.commit_ts_cache.get(url)
 | 
						|
                if ct is not None:
 | 
						|
                    self.commit_ts = ct
 | 
						|
                    return
 | 
						|
                ct = self._init_commit_ts_remote_get(url, attr_path)
 | 
						|
                self.commit_ts = ct
 | 
						|
                self.commit_ts_cache.set(url, ct)
 | 
						|
                return
 | 
						|
            except Exception as e:
 | 
						|
                reasons += '\n' + ("  {}: {}".format(url, e))
 | 
						|
        raise self._error('Cannot fetch commit ts:{}'.format(reasons))
 | 
						|
 | 
						|
    def _init_commit_ts_remote_get(self, url, attrpath):
 | 
						|
        resp = self._make_request(url)
 | 
						|
        data = resp.read()
 | 
						|
        date = json.loads(data)
 | 
						|
        for attr in attrpath:
 | 
						|
            date = date[attr]
 | 
						|
        date = datetime.datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ')
 | 
						|
        date = date.timetuple()
 | 
						|
        ct = calendar.timegm(date)
 | 
						|
        return ct
 | 
						|
 | 
						|
    def _fetch(self, path):
 | 
						|
        """Fetch tarball of the specified version ref."""
 | 
						|
        ref = self.version
 | 
						|
        url = self._make_repo_url_path('tarball', ref)
 | 
						|
        resp = self._make_request(url)
 | 
						|
        with open(path, 'wb') as fout:
 | 
						|
            while True:
 | 
						|
                d = resp.read(4096)
 | 
						|
                if not d:
 | 
						|
                    break
 | 
						|
                fout.write(d)
 | 
						|
 | 
						|
    def _make_repo_url_path(self, *args):
 | 
						|
        url = '/repos/{0}/{1}'.format(self.owner, self.repo)
 | 
						|
        if args:
 | 
						|
            url += '/' + '/'.join(args)
 | 
						|
        return url
 | 
						|
 | 
						|
    def _make_request(self, path):
 | 
						|
        """Request GitHub API endpoint on ``path``."""
 | 
						|
        url = 'https://api.github.com' + path
 | 
						|
        headers = {
 | 
						|
            'Accept': 'application/vnd.github.v3+json',
 | 
						|
            'User-Agent': 'OpenWrt',
 | 
						|
        }
 | 
						|
        req = urllib.request.Request(url, headers=headers)
 | 
						|
        sslcontext = ssl._create_unverified_context()
 | 
						|
        fileobj = urllib.request.urlopen(req, context=sslcontext)
 | 
						|
        return fileobj
 | 
						|
 | 
						|
    def _error(self, msg):
 | 
						|
        return DownloadGitHubError('{}: {}'.format(self.source, msg))
 | 
						|
 | 
						|
 | 
						|
def main():
 | 
						|
    parser = argparse.ArgumentParser()
 | 
						|
    parser.add_argument('--dl-dir', default=os.getcwd(), help='Download dir')
 | 
						|
    parser.add_argument('--url', help='Download URL')
 | 
						|
    parser.add_argument('--subdir', help='Source code subdir name')
 | 
						|
    parser.add_argument('--version', help='Source code version')
 | 
						|
    parser.add_argument('--source', help='Source tarball filename')
 | 
						|
    parser.add_argument('--hash', help='Source tarball\'s expected sha256sum')
 | 
						|
    parser.add_argument('--submodules', nargs='*', help='List of submodules, or "skip"')
 | 
						|
    args = parser.parse_args()
 | 
						|
    try:
 | 
						|
        method = DownloadGitHubTarball(args)
 | 
						|
        method.download()
 | 
						|
    except Exception as ex:
 | 
						|
        sys.stderr.write('{}: Download from {} failed\n'.format(args.source, args.url))
 | 
						|
        sys.stderr.write('{}\n'.format(ex))
 | 
						|
        sys.exit(1)
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    main()
 |