Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7343056
D267.id887.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
11 KB
Subscribers
None
D267.id887.diff
View Options
diff --git a/debian/control b/debian/control
--- a/debian/control
+++ b/debian/control
@@ -24,6 +24,8 @@
Package: python3-swh.lister
Architecture: all
Depends: python3-swh.scheduler (>= 0.0.14~),
+ python3-subvertpy (>= 0.9.4~),
+ python3-suds,
${misc:Depends},
${python3:Depends}
Breaks: python3-swh.lister.github
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,5 @@
requests
setuptools
xmltodict
+suds_jurko
+subvertpy >= 0.9.4
diff --git a/swh/lister/fusionforge/__init__.py b/swh/lister/fusionforge/__init__.py
new file mode 100644
diff --git a/swh/lister/fusionforge/lister.py b/swh/lister/fusionforge/lister.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fusionforge/lister.py
@@ -0,0 +1,269 @@
+# Copyright (C) 2017 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+import requests
+
+from suds.client import Client as SoapClient
+from subvertpy.ra import RemoteAccess
+
+from swh.lister.core.lister_base import SWHListerBase
+from swh.lister.core.lister_transports import SWHListerHttpTransport
+
+from swh.lister.fusionforge.models import FusionForgeModel
+
+
+def _check_repo_url(repo_url):
+ """
+ Check if a repository https url is valid.
+ """
+ try:
+ # send a get request
+ response = requests.get(repo_url)
+ # if status code is 200 or 403 (forbidden to browse but
+ # not to git clone or svn checkout), consider the url valid
+ if response.status_code in [200, 403]:
+ return repo_url
+ return None
+ except:
+ # some repos may not be under https, try http then
+ if repo_url.startswith('https'):
+ return _check_repo_url(repo_url.replace('https', 'http'))
+ return None
+
+
+def _try_to_get_origin_url_and_type(ff_baseurl, project):
+ """
+ Try to find git or svn repository url for
+ a project hosted on a FusionForge instance.
+ """
+
+ # standard FusionForge anonymous svn repository url
+ svn_repo_url = _check_repo_url(
+ 'https://scm.%s/anonscm/svn/%s/' % (ff_baseurl, project))
+ if svn_repo_url:
+ return svn_repo_url, 'svn'
+
+ # standard FusionForge anonymous git repository url
+ git_repo_url = _check_repo_url(
+ 'https://scm.%s/anonscm/git/%s/%s.git' %
+ (ff_baseurl, project, project))
+ if git_repo_url:
+ return git_repo_url, 'git'
+
+ # other possible FusionForge anonymous git repository url
+ git_repo_url = _check_repo_url(
+ 'https://%s/anonscm/git/%s/%s.git' %
+ (ff_baseurl, project, project))
+ if git_repo_url:
+ return git_repo_url, 'git'
+
+ # some svn repository may only be reference by svn:// url type
+ # use subvertpy to check their availability
+ try:
+ svn_repo_url = 'svn://scm.%s/svn/%s' % (ff_baseurl, project)
+ RemoteAccess(svn_repo_url)
+ return svn_repo_url, 'svn'
+ except:
+ pass
+
+ # other possible url schemes for git and svn repos (for instance
+ # those from https://sourcesup.renater.fr)
+ baseurl_parts = ff_baseurl.split('.')
+ if len(baseurl_parts) > 2:
+ for scm in ['subversion', 'svn']:
+ baseurl_parts[0] = scm
+ svn_repo_url = 'https://' + '.'.join(baseurl_parts) + '/' + project + '/' # noqa
+ svn_repo_url = _check_repo_url(svn_repo_url)
+ if svn_repo_url:
+ return svn_repo_url, 'svn'
+
+ baseurl_parts[0] = 'git'
+ git_repo_url = 'https://' + '.'.join(baseurl_parts) + '/' + project + '.git' # noqa
+ git_repo_url = _check_repo_url(git_repo_url)
+ if git_repo_url:
+ return git_repo_url, 'git'
+
+ return None, None
+
+
+def _can_load_svn_repo(svn_repo_url):
+ """
+ Check if a svn repository is valid and
+ contains at least one revision.
+ """
+ ret = False
+ try:
+ conn = RemoteAccess(svn_repo_url)
+ ret = conn.get_latest_revnum() > 0
+ except:
+ pass
+ return ret
+
+
+class FusionForgeLister(SWHListerHttpTransport, SWHListerBase):
+ """
+ Lister for FusionForge.
+ It takes a list of forge urls from the configuration
+ to list projects from (eg. sourcesup.renater.fr, adullact.net,
+ gforge.inria.fr) and create swh loading tasks.
+ To list the projects, the FusionForge SOAP web services are used.
+ """
+ MODEL = FusionForgeModel
+ PATH_TEMPLATE = None
+
+ @property
+ def ADDITIONAL_CONFIG(self): # noqa: N802
+ config = super().ADDITIONAL_CONFIG
+ config['fusionforge_origins'] = ('list',
+ [{'baseurl': 'gforge.inria.fr',
+ 'user': None,
+ 'password': None}])
+ return config
+
+ def __init__(self, lister_name='fusionforge', override_config=None):
+ SWHListerHttpTransport.__init__(self, api_baseurl="bogus")
+ SWHListerBase.__init__(self, lister_name=lister_name,
+ override_config=override_config)
+ self.soap_client = None
+ self.session = ''
+
+ def transport_quota_check(self, response):
+ return False, 0
+
+ def transport_request(self, identifier):
+ """
+ Retrieve metadata associated to a FusionForge project
+ using the getGroupsByName SOAP service.
+ """
+ response = None
+ try:
+ response = self.soap_client.getGroupsByName(
+ self.session, [identifier])[0]
+ except:
+ pass
+ return response
+
+ def transport_response_simplified(self, response):
+ """
+ Process metadata for a FusionForge project and try to find url
+ for its code repository (only git and svn at the moment).
+ In that latter case, return a model in order to create a swh
+ loading task.
+ """
+ ret = []
+ if response:
+ project_name = response.unix_group_name
+ # if the FusionForge SOAP API has the repositoryapi_* service
+ # we already have put in cache the code repository urls
+ if project_name in self.repos_data:
+ origin_url = self.repos_data[project_name].repository_urls[0]
+ origin_type = self.repos_data[project_name].repository_type
+ # otherwise we try to find the repository url
+ else:
+ origin_url, origin_type = _try_to_get_origin_url_and_type(
+ self.fusionforge_baseurl, project_name)
+
+ # filter out invalid or empty svn repos
+ if origin_type == 'svn' and not _can_load_svn_repo(origin_url):
+ origin_url = None
+
+ if origin_url:
+ ret.append({'uid': project_name,
+ 'indexable': project_name,
+ 'name': project_name,
+ 'full_name': response.group_name,
+ 'html_url': response.homepage,
+ 'origin_url': origin_url,
+ 'origin_type': origin_type,
+ 'description': response.short_description
+ })
+ else:
+ logging.info('Unable to find git or svn repository url '
+ 'for project %s from FusionForge %s' %
+ (project_name, self.fusionforge_baseurl))
+ return ret
+
+ def run(self):
+ """
+ Run the lister.
+ """
+ fusionforge_origins = self.config['fusionforge_origins']
+
+ ret = False
+
+ for fusionforge_origin in fusionforge_origins:
+ self.fusionforge_baseurl = fusionforge_origin['baseurl']
+ forge_wsdl_url = 'https://%s/soap/?wsdl=1' % \
+ self.fusionforge_baseurl
+
+ # initialize SOAP client
+ try:
+ self.soap_client = SoapClient(forge_wsdl_url).service
+ except:
+ logging.error(
+ 'Unable to initialize web service client from url %s' %
+ forge_wsdl_url)
+ continue
+
+ # case where login is required to use the web services
+ user = fusionforge_origin.get('user', None)
+ password = fusionforge_origin.get('password', None)
+ if user and password:
+ try:
+ self.session = self.soap_client.login(user, password)
+ except:
+ logging.error(
+ 'Failed to authenticate user %s on %s' %
+ (user, self.fusionforge_baseurl))
+ continue
+
+ self.repos_data = {}
+ projects = []
+
+ # some forge (for instance adullact.net) have deployed a service
+ # to list code repositories, try to use it
+ try:
+ repos_list = self.soap_client.repositoryapi_repositoryList(self.session) # noqa
+
+ # create a cache holding repositories data for latter use
+ # in the listing process
+ for repo in repos_list:
+ project_name = repo.repository_id.split('/')[-1]
+ projects.append(project_name)
+ self.repos_data[project_name] = repo
+ except:
+ pass
+
+ # get all public project names on the forge
+ public_projects = self.soap_client.getPublicProjectNames(self.session) # noqa
+ projects = set(projects) | set(public_projects)
+
+ # process the projects and try to find their code repository
+ loop_count = 0
+ for project in projects:
+ self.ingest_data(project)
+ loop_count += 1
+ if loop_count == 20:
+ loop_count = 0
+ self.db_session.commit()
+ self.db_session = self.mk_session()
+
+ ret = True
+
+ return ret
+
+ def task_dict(self, origin_type, origin_url):
+ """
+ Create scheduler task for loading the found
+ repositories during projects listing.
+ """
+ tsk_dict = super().task_dict(origin_type, origin_url)
+ # set one shot task for the moment
+ tsk_dict['policy'] = 'oneshot'
+ # required parameters for the svn loader
+ if origin_type == 'svn':
+ tsk_dict['arguments']['kwargs']['destination_path'] = '/tmp'
+ tsk_dict['arguments']['kwargs']['svn_url'] = origin_url
+ return tsk_dict
diff --git a/swh/lister/fusionforge/models.py b/swh/lister/fusionforge/models.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fusionforge/models.py
@@ -0,0 +1,15 @@
+# Copyright (C) 2017 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from sqlalchemy import Column, String
+
+from swh.lister.core.models import ModelBase
+
+
+class FusionForgeModel(ModelBase):
+ """a FusionForge repository"""
+ __tablename__ = 'fusionforge_repos'
+
+ uid = Column(String, primary_key=True)
+ indexable = Column(String, index=True)
diff --git a/swh/lister/fusionforge/tasks.py b/swh/lister/fusionforge/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/lister/fusionforge/tasks.py
@@ -0,0 +1,18 @@
+# Copyright (C) 2017 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.lister.core.tasks import ListerTaskBase # noqa
+
+from .lister import FusionForgeLister
+
+
+class FusionForgeListerTask(ListerTaskBase):
+ task_queue = 'swh_lister_fusionforge'
+
+ def new_lister(self):
+ return FusionForgeLister()
+
+ def run_task(self):
+ lister = self.new_lister()
+ return lister.run()
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mar 17 2025, 7:01 PM (7 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3230934
Attached To
D267: [WIP] add first implementation of FusionForge lister
Event Timeline
Log In to Comment