Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F8396475
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
View Options
diff --git a/swh/fetcher/googlecode/retrieve-google-archive-source.py b/swh/fetcher/googlecode/retrieve-google-archive-source.py
index 875ea7e..80cba27 100755
--- a/swh/fetcher/googlecode/retrieve-google-archive-source.py
+++ b/swh/fetcher/googlecode/retrieve-google-archive-source.py
@@ -1,34 +1,44 @@
# Copyright (C) 2015-2016 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import sys
import os
import requests
from . import utils
if __name__ == '__main__':
for archive in sys.stdin:
archive_gs = archive.rstrip()
- parent_dir, filename, url_meta, url_content = utils.transform(
+ data = utils.transform( # noqa
archive_gs)
+
+ parent_dir = data['parent_dir']
+ filename = data['filename']
+ url_project_archive_meta = data['url_project_archive_meta']
+ url_project_meta = data['url_project_meta']
os.makedirs(parent_dir, exist_ok=True)
project_name = os.path.basename(parent_dir)
filename = project_name + '-' + filename + '.json'
try:
- r = requests.get(url_meta)
+ r = requests.get(url_project_archive_meta)
+ except:
+ archive_size = ''
+ else:
+ archive_size = r.json()['size']
+
+ repo_type = ''
+ try:
+ r = requests.get(url_project_meta)
except:
- print('', filename)
+ repo_type = ''
else:
- print(r.json()['size'], filename)
+ repo_type = r.json()['repoType']
- # we store the project metadata
- filepath = os.path.join(parent_dir, filename)
- with open(filepath, 'wb') as f:
- f.write(r.text.encode('utf-8'))
+ print(filename, archive_size, repo_type)
diff --git a/swh/fetcher/googlecode/tests/test_utils.py b/swh/fetcher/googlecode/tests/test_utils.py
new file mode 100644
index 0000000..601bb0a
--- /dev/null
+++ b/swh/fetcher/googlecode/tests/test_utils.py
@@ -0,0 +1,33 @@
+# Copyright (C) 2016 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+
+from nose.tools import istest
+
+from swh.fetcher.googlecode.utils import transform
+
+
+class TestUtils(unittest.TestCase):
+ @istest
+ def transform(self):
+ input = 'gs://google-code-archive-source/v2/apache-extras.org/abhishsinha-cassandra-jdbc-source/source-archive.zip' # noqa
+
+ actual_gs_transformed = transform(input)
+
+ self.assertEquals(
+ actual_gs_transformed, {
+ 'parent_dir':
+ 'v2/apache-extras.org/a/abhishsinha-cassandra-jdbc-source',
+
+ 'filename':
+ 'source-archive.zip',
+
+ 'url_project_archive_meta':
+ 'https://www.googleapis.com/storage/v1/b/google-code-archive-source/o/v2%2Fapache-extras.org%2Fabhishsinha-cassandra-jdbc-source%2Fsource-archive.zip', # noqa
+
+ 'url_project_meta':
+ 'https://storage.googleapis.com/google-code-archive/v2/apache-extras.org/abhishsinha-cassandra-jdbc-source/project.json' # noqa
+ })
diff --git a/swh/fetcher/googlecode/utils.py b/swh/fetcher/googlecode/utils.py
index a587be8..26eca8b 100644
--- a/swh/fetcher/googlecode/utils.py
+++ b/swh/fetcher/googlecode/utils.py
@@ -1,35 +1,48 @@
# Copyright (C) 2015-2016 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
def compute_destination_folder(path):
"""Given a path, compute a destination folder to which downloads the
remote files.
"""
parent_dir = os.path.dirname(path)
project_name = os.path.basename(parent_dir)
parent_ddir = os.path.dirname(parent_dir)
return os.path.join(parent_ddir, project_name[0], project_name)
-prefix_url_api = 'https://www.googleapis.com/storage/v1/b/google-code-archive-source/o' # noqa
+prefix_source_url_api = 'https://www.googleapis.com/storage/v1/b/google-code-archive-source/o' # noqa
+prefix_project_meta = 'https://storage.googleapis.com/google-code-archive'
def transform(url_gs):
- """Transform input gs:// url into:
+ """Transform input gs:// url into a dictionary with the following
+ information.
+
+ Returns:
+ Dict of the following form:
- destination folder
- filename
- - metadata url to fetch
- - actual content to fetch
+ - metadata archive url to fetch
+ - project metadata url to fetch
+
"""
url_gs = url_gs.replace('gs://google-code-archive-source/', '')
filename = os.path.basename(url_gs)
- url_meta = '%s/%s' % (prefix_url_api, url_gs.replace('/', '%2F'))
- parent_dir = compute_destination_folder(url_gs)
- return parent_dir, filename, url_meta, '%s?alt=media' % url_meta
+ project_name = os.path.dirname(url_gs)
+ url_meta = '%s/%s' % (prefix_source_url_api, url_gs.replace('/', '%2F'))
+ url_project_meta = '%s/%s/project.json' % (prefix_project_meta,
+ project_name)
+ return {
+ 'parent_dir': compute_destination_folder(url_gs),
+ 'filename': filename,
+ 'url_project_archive_meta': url_meta,
+ 'url_project_meta': url_project_meta
+ }
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jun 4 2025, 7:53 PM (13 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3255061
Attached To
R61 Fetcher Googlecode
Event Timeline
Log In to Comment