diff --git a/swh/mirror/forge/sync.py b/swh/mirror/forge/sync.py index 442a6c1..27e7819 100755 --- a/swh/mirror/forge/sync.py +++ b/swh/mirror/forge/sync.py @@ -1,387 +1,387 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import click import json import sys import requests from os.path import basename from swh.core.config import SWHConfig from .request import RepositorySearch, PassphraseSearch from .request import DiffusionUriEdit, RepositoriesToMirror def mirror_exists(data): """Check the existence of the mirror. Args: data: full information on the repository Returns True if mirror already exists. False otherwise. """ uris = data['attachments']['uris']['uris'] for uri in uris: effective_url = uri['fields']['uri']['effective'] if 'github' in effective_url: return True return False def retrieve_repo_information(data): """Given information on repository, extract the needed information for mirroring. Args: data: full information on the repository Returns: dict with keys phid, description, url, name. """ uris = [u for u in data['attachments']['uris']['uris'] if not u['fields']['disabled']] elected_url = None # Will try to retrieve the most relevant uri (https first) for uri in uris: if uri['fields']['builtin']['protocol'] != 'https': continue effective_url = uri['fields']['uri']['effective'] if effective_url.endswith('.git'): elected_url = effective_url break # then fallback to any other if no https were found if not elected_url: for uri in uris: effective_url = uri['fields']['uri']['effective'] if effective_url.endswith('.git'): elected_url = effective_url break return { 'phid': data['phid'], 'description': data['fields']['name'], 'url': elected_url, 'name': basename(elected_url).split('.')[0], } class SWHMirrorForge(SWHConfig): """Class in charge of mirroring a forge to github. """ CONFIG_BASE_FILENAME = 'mirror-forge/config' DEFAULT_CONFIG = { 'forge_url': ('str', 'https://forge.softwareheritage.org'), 'tokens': ('dict', {'github': None, 'forge': None}) } def __init__(self): super().__init__() self.config = self.parse_config_file() self.forge_url = self.config['forge_url'] self.token_github = self.config['tokens']['github'] self.token_forge = self.config['tokens']['forge'] self._check() def _check(self): """Prepare the needed token from the disk. Returns: tuple (token-forge, token-github) """ if not self.token_forge: raise ValueError("""Install the phabricator forge's token in $SWH_CONFIG_PATH/mirror-forge/config.yml (https://forge.softwareheritage.org/settings/user//page/apitokens/). Once the installation is done, you can trigger this script again.""") if not self.token_github: raise ValueError("""Install one personal github token in $SWH_CONFIG_PATH/mirror-forge/config.yml with scope public_repo (https://github.com/settings/tokens). You must be associated to https://github.com/softwareheritage organization. Once the installation is done, you can trigger this script again.""") def mirror_repo_to_github(self, repo_id, credential_key_id, bypass_check, dry_run=False): """Instantiate a mirror from a repository forge to github if it does not already exist. Args: repo_id: repository's identifier (callsign, phid or id). This will be used to fetch information on the repository to mirror. credential_key_id: the key the forge will use to push to modifications to github dry_run: if True, inhibit the mirror creation (no write is done to either github) or the forge. Otherwise, the default, it creates the mirror to github. Also, a check is done to stop if a mirror uri is already referenced in the forge about github. Returns: the repository instance whose mirror has been successfully mirrored. None if the mirror already exists. Raises: ValueError if some error occurred during any creation/reading step. The detail of the error is in the message. """ token_forge = self.token_forge token_github = self.token_github forge_api_url = self.forge_url # Retrieve repository information if isinstance(repo_id, int): constraint_key = "ids" elif repo_id.startswith("PHID"): constraint_key = "phids" else: constraint_key = "callsigns" data = RepositorySearch(forge_api_url, token_forge).post({ 'constraints[%s][0]' % constraint_key: repo_id, 'attachments[uris]': True }) repository_information = data[0] # Check existence of mirror already set if mirror_exists(repository_information) and not bypass_check: if not bypass_check: return None print('** Bypassing check as requested **') # Retrieve exhaustive information on repository repo = retrieve_repo_information(repository_information) if not repo: raise ValueError('Error when trying to retrieve detailed' ' information on the repository') # Create repository in github if not dry_run: r = requests.post( 'https://api.github.com/orgs/SoftwareHeritage/repos', headers={'Authorization': 'token %s' % token_github}, data=json.dumps({ "name": repo['name'], - "description": repo['description'], + "description": 'GitHub mirror of ' + repo['description'], "homepage": repo['url'], "private": False, "has_issues": False, "has_wiki": False, "has_downloads": True })) if not r.ok: raise ValueError("""Failure to create the repository in github. Status: %s""" % r.status_code) # Retrieve credential information data = PassphraseSearch(forge_api_url, token_forge).post({ 'ids[0]': credential_key_id }) # Retrieve the phid for that passphrase key_phid = list(data.values())[0]['phid'] repo['url_github'] = 'git@github.com:SoftwareHeritage/%s.git' % ( repo['name']) # Install the github mirror in the forge if not dry_run: DiffusionUriEdit(forge_api_url, token_forge).post({ 'transactions[0][type]': 'repository', 'transactions[0][value]': repo['phid'], 'transactions[1][type]': 'uri', 'transactions[1][value]': repo['url_github'], 'transactions[2][type]': 'io', 'transactions[2][value]': 'mirror', 'transactions[3][type]': 'display', 'transactions[3][value]': 'never', 'transactions[4][type]': 'disable', 'transactions[4][value]': False, 'transactions[5][type]': 'credential', 'transactions[4][value]': key_phid, }) return repo def mirror_repos_to_github(self, query_name, credential_key_id, bypass_check, dry_run): """Mirror repositories to github. Args: credential_key_id: the key the forge will use to push to modifications to github query_name: Query's name as per your phabricator forge's setup. dry_run: if True, inhibit the mirror creation (no write is done to either github) or the forge. Otherwise, the default, it creates the mirror to github. Also, a check is done to stop if a mirror uri is already referenced in the forge about github. Returns: dict with keys 'mirrored', 'skipped' and 'errors' keys. """ token_forge = self.token_forge forge_api_url = self.forge_url repositories = list( RepositoriesToMirror(forge_api_url, token_forge).post({ 'queryKey': query_name})) if not repositories: return None for repo in repositories: repo_id = repo['id'] try: if dry_run: print("** DRY RUN - name '%s' ; id '%s' **" % ( repo['name'], repo_id)) repo_detail = self.mirror_repo_to_github( repo_id, credential_key_id, bypass_check, dry_run) if repo_detail: yield "Repository %s mirrored at %s." % ( repo_detail['url'], repo_detail['url_github']) else: yield 'Mirror already configured for %s, stopping.' % ( repo_id) except Exception as e: yield str(e) @click.group() def cli(): pass @cli.command() @click.option('--repo-id', help="Repository's identifier (either callsign, id or phid)") @click.option('--credential-key-id', help="""credential to use for access from phabricator's forge to github""") @click.option('--bypass-check/--no-bypass-check', help="""By default, the process of mirroring stops if a github mirror already exists. This flag bypasses the check. """) @click.option('--dry-run/--no-dry-run', default=False) def mirror(repo_id, credential_key_id, bypass_check, dry_run): """Shell interface to instantiate a mirror from a repository forge to github. Does nothing if the repository already exists. Args: repo_id: repository's identifier callsign. This will be used to fetch information on the repository to mirror. credential_key_id: the key the forge will use to push to modifications to github dry_run: if True, inhibit the mirror creation (no write is done to either github) or the forge. Otherwise, the default, it creates the mirror to github. Also, a check is done to stop if a mirror uri is already referenced in the forge about github. """ mirror_forge = SWHMirrorForge() msg = '' try: if dry_run: print('** DRY RUN **') repo = mirror_forge.mirror_repo_to_github( repo_id, credential_key_id, bypass_check, dry_run) if repo: msg = "Repository %s mirrored at %s." % ( repo['url'], repo['url_github']) else: msg = 'Mirror already configured for %s, stopping.' % repo_id except Exception as e: print(e) sys.exit(1) else: print(msg) sys.exit(0) @cli.command() @click.option('--query-repositories', help="""Name of the query that lists the repositories to mirror in github.""") @click.option('--credential-key-id', help="""credential to use for access from phabricator's forge to github""") @click.option('--bypass-check/--no-bypass-check', default=False, help="""By default, the process of mirroring stops if a github mirror already exists. This flag bypasses the check. """) @click.option('--dry-run/--no-dry-run', default=False, help="""Do nothing but read and print what would actually happen without the flag.""") def mirrors(query_repositories, credential_key_id, bypass_check, dry_run): """Shell interface to instantiate mirrors from a repository forge to github. This uses the query_name provided to execute said query. The resulting repositories is then mirrored to github if not already mirrored. Args: credential_key_id: the key the forge will use to push to modifications to github query_repositories: Query's name which lists the repositories to mirror (as per phabricator forge's setup). dry_run: if True, inhibit the mirror creation (no write is done to either github) or the forge. Otherwise, the default, it creates the mirror to github. Also, a check is done to stop if a mirror uri is already referenced in the forge about github. """ mirror_forge = SWHMirrorForge() if dry_run: print('** DRY RUN **') for msg in mirror_forge.mirror_repos_to_github( query_name=query_repositories, credential_key_id=credential_key_id, bypass_check=bypass_check, dry_run=dry_run): print(msg) if __name__ == '__main__': cli()