Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/fetcher/googlecode/retrieve-google-archive-source.py b/swh/fetcher/googlecode/retrieve-google-archive-source.py
index 875ea7e..80cba27 100755
--- a/swh/fetcher/googlecode/retrieve-google-archive-source.py
+++ b/swh/fetcher/googlecode/retrieve-google-archive-source.py
@@ -1,34 +1,44 @@
# Copyright (C) 2015-2016 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import sys
import os
import requests
from . import utils
if __name__ == '__main__':
for archive in sys.stdin:
archive_gs = archive.rstrip()
- parent_dir, filename, url_meta, url_content = utils.transform(
+ data = utils.transform( # noqa
archive_gs)
+
+ parent_dir = data['parent_dir']
+ filename = data['filename']
+ url_project_archive_meta = data['url_project_archive_meta']
+ url_project_meta = data['url_project_meta']
os.makedirs(parent_dir, exist_ok=True)
project_name = os.path.basename(parent_dir)
filename = project_name + '-' + filename + '.json'
try:
- r = requests.get(url_meta)
+ r = requests.get(url_project_archive_meta)
+ except:
+ archive_size = ''
+ else:
+ archive_size = r.json()['size']
+
+ repo_type = ''
+ try:
+ r = requests.get(url_project_meta)
except:
- print('', filename)
+ repo_type = ''
else:
- print(r.json()['size'], filename)
+ repo_type = r.json()['repoType']
- # we store the project metadata
- filepath = os.path.join(parent_dir, filename)
- with open(filepath, 'wb') as f:
- f.write(r.text.encode('utf-8'))
+ print(filename, archive_size, repo_type)
diff --git a/swh/fetcher/googlecode/tests/test_utils.py b/swh/fetcher/googlecode/tests/test_utils.py
new file mode 100644
index 0000000..601bb0a
--- /dev/null
+++ b/swh/fetcher/googlecode/tests/test_utils.py
@@ -0,0 +1,33 @@
+# Copyright (C) 2016 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+
+from nose.tools import istest
+
+from swh.fetcher.googlecode.utils import transform
+
+
+class TestUtils(unittest.TestCase):
+ @istest
+ def transform(self):
+ input = 'gs://google-code-archive-source/v2/apache-extras.org/abhishsinha-cassandra-jdbc-source/source-archive.zip' # noqa
+
+ actual_gs_transformed = transform(input)
+
+ self.assertEquals(
+ actual_gs_transformed, {
+ 'parent_dir':
+ 'v2/apache-extras.org/a/abhishsinha-cassandra-jdbc-source',
+
+ 'filename':
+ 'source-archive.zip',
+
+ 'url_project_archive_meta':
+ 'https://www.googleapis.com/storage/v1/b/google-code-archive-source/o/v2%2Fapache-extras.org%2Fabhishsinha-cassandra-jdbc-source%2Fsource-archive.zip', # noqa
+
+ 'url_project_meta':
+ 'https://storage.googleapis.com/google-code-archive/v2/apache-extras.org/abhishsinha-cassandra-jdbc-source/project.json' # noqa
+ })
diff --git a/swh/fetcher/googlecode/utils.py b/swh/fetcher/googlecode/utils.py
index a587be8..26eca8b 100644
--- a/swh/fetcher/googlecode/utils.py
+++ b/swh/fetcher/googlecode/utils.py
@@ -1,35 +1,48 @@
# Copyright (C) 2015-2016 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
def compute_destination_folder(path):
"""Given a path, compute a destination folder to which downloads the
remote files.
"""
parent_dir = os.path.dirname(path)
project_name = os.path.basename(parent_dir)
parent_ddir = os.path.dirname(parent_dir)
return os.path.join(parent_ddir, project_name[0], project_name)
-prefix_url_api = 'https://www.googleapis.com/storage/v1/b/google-code-archive-source/o' # noqa
+prefix_source_url_api = 'https://www.googleapis.com/storage/v1/b/google-code-archive-source/o' # noqa
+prefix_project_meta = 'https://storage.googleapis.com/google-code-archive'
def transform(url_gs):
- """Transform input gs:// url into:
+ """Transform input gs:// url into a dictionary with the following
+ information.
+
+ Returns:
+ Dict of the following form:
- destination folder
- filename
- - metadata url to fetch
- - actual content to fetch
+ - metadata archive url to fetch
+ - project metadata url to fetch
+
"""
url_gs = url_gs.replace('gs://google-code-archive-source/', '')
filename = os.path.basename(url_gs)
- url_meta = '%s/%s' % (prefix_url_api, url_gs.replace('/', '%2F'))
- parent_dir = compute_destination_folder(url_gs)
- return parent_dir, filename, url_meta, '%s?alt=media' % url_meta
+ project_name = os.path.dirname(url_gs)
+ url_meta = '%s/%s' % (prefix_source_url_api, url_gs.replace('/', '%2F'))
+ url_project_meta = '%s/%s/project.json' % (prefix_project_meta,
+ project_name)
+ return {
+ 'parent_dir': compute_destination_folder(url_gs),
+ 'filename': filename,
+ 'url_project_archive_meta': url_meta,
+ 'url_project_meta': url_project_meta
+ }

File Metadata

Mime Type
text/x-diff
Expires
Jun 4 2025, 7:53 PM (13 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3255061

Event Timeline