diff --git a/swh/web/api/utils.py b/swh/web/api/utils.py --- a/swh/web/api/utils.py +++ b/swh/web/api/utils.py @@ -6,6 +6,7 @@ import re from swh.web.common.utils import reverse, fmap +from swh.web.common.query import parse_hash def filter_endpoints(url_map, prefix_url_rule, blacklist=[]): @@ -158,25 +159,43 @@ return c -def enrich_content(content, top_url=False): +def enrich_content(content, top_url=False, query_string=None): """Enrich content with links to: - data_url: its raw data - filetype_url: its filetype information + - language_url: its programming language information + - license_url: its licensing information + + Args: + content: dict of data associated to a swh content object + top_url: whether or not to include the content url in + the enriched data + query_string: optional query string of type ':' + used when requesting the content, it acts as a hint + for picking the same hash method when computing + the url listed above + + Returns: + An enriched content dict filled with additional urls """ - for h in ['sha1', 'sha1_git', 'sha256']: - if h in content: - q = '%s:%s' % (h, content[h]) - if top_url: - content['content_url'] = reverse('content', kwargs={'q': q}) - content['data_url'] = reverse('content-raw', kwargs={'q': q}) - content['filetype_url'] = reverse('content-filetype', - kwargs={'q': q}) - content['language_url'] = reverse('content-language', - kwargs={'q': q}) - content['license_url'] = reverse('content-license', - kwargs={'q': q}) - break + checksums = content + if 'checksums' in content: + checksums = content['checksums'] + hash_algo = 'sha1' + if query_string: + hash_algo = parse_hash(query_string)[0] + if hash_algo in checksums: + q = '%s:%s' % (hash_algo, checksums[hash_algo]) + if top_url: + content['content_url'] = reverse('content', kwargs={'q': q}) + content['data_url'] = reverse('content-raw', kwargs={'q': q}) + content['filetype_url'] = reverse('content-filetype', + kwargs={'q': q}) + content['language_url'] = reverse('content-language', + kwargs={'q': q}) + content['license_url'] = reverse('content-license', + kwargs={'q': q}) return content diff --git a/swh/web/api/views/content.py b/swh/web/api/views/content.py --- a/swh/web/api/views/content.py +++ b/swh/web/api/views/content.py @@ -337,4 +337,4 @@ return _api_lookup( service.lookup_content, q, notfound_msg='Content with {} not found.'.format(q), - enrich_fn=utils.enrich_content) + enrich_fn=functools.partial(utils.enrich_content, query_string=q)) diff --git a/swh/web/common/converters.py b/swh/web/common/converters.py --- a/swh/web/common/converters.py +++ b/swh/web/common/converters.py @@ -11,6 +11,20 @@ from swh.web.api import utils +def _group_checksums(data): + """Groups checksums values computed from hash functions used in swh + and stored in data dict under a single entry 'checksums' + """ + if data: + checksums = {} + for hash in hashutil.ALGORITHMS: + if hash in data and data[hash]: + checksums[hash] = data[hash] + del data[hash] + if len(checksums) > 0: + data['checksums'] = checksums + + def from_swh(dict_swh, hashess={}, bytess={}, dates={}, blacklist={}, removables_if_empty={}, empty_dict={}, empty_list={}, convert={}, convert_fn=lambda x: x): @@ -130,6 +144,8 @@ else: new_dict[key] = value + _group_checksums(new_dict) + return new_dict @@ -308,10 +324,11 @@ """ return from_swh(dir_entry, - hashess={'dir_id', 'sha1_git', 'sha1', 'sha256', 'target'}, + hashess={'dir_id', 'sha1_git', 'sha1', 'sha256', + 'blake2s256', 'target'}, bytess={'name'}, removables_if_empty={ - 'sha1', 'sha1_git', 'sha256', 'status'}, + 'sha1', 'sha1_git', 'sha256', 'blake2s256', 'status'}, convert={'status'}, convert_fn=lambda v: 'absent' if v == 'hidden' else v) diff --git a/swh/web/common/service.py b/swh/web/common/service.py --- a/swh/web/common/service.py +++ b/swh/web/common/service.py @@ -621,7 +621,6 @@ if entity['type'] == 'dir': directory_entries = storage.directory_ls(entity['target']) or [] - return {'type': 'dir', 'path': '.' if not dir_path else dir_path, 'revision': sha1_git, @@ -632,7 +631,6 @@ if with_data: c = _first_element(storage.content_get([content['sha1']])) content['data'] = c['data'] - return {'type': 'file', 'path': '.' if not dir_path else dir_path, 'revision': sha1_git, diff --git a/swh/web/tests/api/test_utils.py b/swh/web/tests/api/test_utils.py --- a/swh/web/tests/api/test_utils.py +++ b/swh/web/tests/api/test_utils.py @@ -29,6 +29,14 @@ dict(rule='/other2', methods=set([]), endpoint=None)] + self.sample_content_hashes = { + 'blake2s256': ('791e07fcea240ade6dccd0a9309141673' + 'c31242cae9c237cf3855e151abc78e9'), + 'sha1': 'dc2830a9e72f23c1dfebef4413003221baa5fb62', + 'sha1_git': 'fe95a46679d128ff167b7c55df5d02356c5a1ae1', + 'sha256': ('b5c7fe0536f44ef60c8780b6065d30bca74a5cd06' + 'd78a4a71ba1ad064770f0c9') + } @istest def filter_endpoints_1(self): @@ -449,41 +457,44 @@ @patch('swh.web.api.utils.reverse') @istest def enrich_content_with_hashes(self, mock_django_reverse): - for h in ['sha1', 'sha256', 'sha1_git']: + + for algo, hash in self.sample_content_hashes.items(): + + query_string = '%s:%s' % (algo, hash) + # given mock_django_reverse.side_effect = [ - '/api/content/%s:123/raw/' % h, - '/api/filetype/%s:123/' % h, - '/api/language/%s:123/' % h, - '/api/license/%s:123/' % h, + '/api/content/%s/raw/' % query_string, + '/api/filetype/%s/' % query_string, + '/api/language/%s/' % query_string, + '/api/license/%s/' % query_string ] # when enriched_content = utils.enrich_content( { - 'id': '123', - h: 'blahblah' - } + algo: hash, + }, + query_string=query_string ) # then self.assertEqual( enriched_content, { - 'id': '123', - h: 'blahblah', - 'data_url': '/api/content/%s:123/raw/' % h, - 'filetype_url': '/api/filetype/%s:123/' % h, - 'language_url': '/api/language/%s:123/' % h, - 'license_url': '/api/license/%s:123/' % h, + algo: hash, + 'data_url': '/api/content/%s/raw/' % query_string, + 'filetype_url': '/api/filetype/%s/' % query_string, + 'language_url': '/api/language/%s/' % query_string, + 'license_url': '/api/license/%s/' % query_string, } ) mock_django_reverse.assert_has_calls([ - call('content-raw', kwargs={'q': '%s:blahblah' % h}), - call('content-filetype', kwargs={'q': '%s:blahblah' % h}), - call('content-language', kwargs={'q': '%s:blahblah' % h}), - call('content-license', kwargs={'q': '%s:blahblah' % h}), + call('content-raw', kwargs={'q': query_string}), + call('content-filetype', kwargs={'q': query_string}), + call('content-language', kwargs={'q': query_string}), + call('content-license', kwargs={'q': query_string}), ]) mock_django_reverse.reset() @@ -492,45 +503,48 @@ @istest def enrich_content_with_hashes_and_top_level_url(self, mock_django_reverse): - for h in ['sha1', 'sha256', 'sha1_git']: + + for algo, hash in self.sample_content_hashes.items(): + + query_string = '%s:%s' % (algo, hash) + # given mock_django_reverse.side_effect = [ - '/api/content/%s:123/' % h, - '/api/content/%s:123/raw/' % h, - '/api/filetype/%s:123/' % h, - '/api/language/%s:123/' % h, - '/api/license/%s:123/' % h, + '/api/content/%s/' % query_string, + '/api/content/%s/raw/' % query_string, + '/api/filetype/%s/' % query_string, + '/api/language/%s/' % query_string, + '/api/license/%s/' % query_string, ] # when enriched_content = utils.enrich_content( { - 'id': '123', - h: 'blahblah' + algo: hash }, - top_url=True + top_url=True, + query_string=query_string ) # then self.assertEqual( enriched_content, { - 'id': '123', - h: 'blahblah', - 'content_url': '/api/content/%s:123/' % h, - 'data_url': '/api/content/%s:123/raw/' % h, - 'filetype_url': '/api/filetype/%s:123/' % h, - 'language_url': '/api/language/%s:123/' % h, - 'license_url': '/api/license/%s:123/' % h, + algo: hash, + 'content_url': '/api/content/%s/' % query_string, + 'data_url': '/api/content/%s/raw/' % query_string, + 'filetype_url': '/api/filetype/%s/' % query_string, + 'language_url': '/api/language/%s/' % query_string, + 'license_url': '/api/license/%s/' % query_string, } ) mock_django_reverse.assert_has_calls([ - call('content', kwargs={'q': '%s:blahblah' % h}), - call('content-raw', kwargs={'q': '%s:blahblah' % h}), - call('content-filetype', kwargs={'q': '%s:blahblah' % h}), - call('content-language', kwargs={'q': '%s:blahblah' % h}), - call('content-license', kwargs={'q': '%s:blahblah' % h}), + call('content', kwargs={'q': query_string}), + call('content-raw', kwargs={'q': query_string}), + call('content-filetype', kwargs={'q': query_string}), + call('content-language', kwargs={'q': query_string}), + call('content-license', kwargs={'q': query_string}), ]) mock_django_reverse.reset() diff --git a/swh/web/tests/api/views/test_content.py b/swh/web/tests/api/views/test_content.py --- a/swh/web/tests/api/views/test_content.py +++ b/swh/web/tests/api/views/test_content.py @@ -381,13 +381,17 @@ @patch('swh.web.api.views.content.service') @istest - def api_content_metadata(self, mock_service): + def test_api_content_metadata(self, mock_service): # given mock_service.lookup_content.return_value = { - 'sha1': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', - 'sha1_git': 'b4e8f472ffcb01a03875b26e462eb568739f6882', - 'sha256': '83c0e67cc80f60caf1fcbec2d84b0ccd7968b3be4735637006560' - 'cde9b067a4f', + 'checksums': { + 'blake2s256': '685395c5dc57cada459364f0946d3dd45bad5f' + 'cbabc1048edb44380f1d31d0aa', + 'sha1': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', + 'sha1_git': 'b4e8f472ffcb01a03875b26e462eb568739f6882', + 'sha256': '83c0e67cc80f60caf1fcbec2d84b0ccd7968b3be47' + '35637006560cde9b067a4f', + }, 'length': 17, 'status': 'visible' } @@ -399,6 +403,14 @@ self.assertEquals(rv.status_code, 200) self.assertEquals(rv['Content-Type'], 'application/json') self.assertEquals(rv.data, { + 'checksums': { + 'blake2s256': '685395c5dc57cada459364f0946d3dd45bad5f' + 'cbabc1048edb44380f1d31d0aa', + 'sha1': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', + 'sha1_git': 'b4e8f472ffcb01a03875b26e462eb568739f6882', + 'sha256': '83c0e67cc80f60caf1fcbec2d84b0ccd7968b3be47' + '35637006560cde9b067a4f', + }, 'data_url': '/api/1/content/' 'sha1:40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03/raw/', 'filetype_url': '/api/1/content/' @@ -407,10 +419,6 @@ 'sha1:40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03/language/', 'license_url': '/api/1/content/' 'sha1:40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03/license/', - 'sha1': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', - 'sha1_git': 'b4e8f472ffcb01a03875b26e462eb568739f6882', - 'sha256': '83c0e67cc80f60caf1fcbec2d84b0ccd7968b3be4735637006560c' - 'de9b067a4f', 'length': 17, 'status': 'visible' }) diff --git a/swh/web/tests/common/test_converters.py b/swh/web/tests/common/test_converters.py --- a/swh/web/tests/common/test_converters.py +++ b/swh/web/tests/common/test_converters.py @@ -643,12 +643,14 @@ # 'status' is filtered expected_content = { - 'sha1': '5c6f0e2750f48fa0bd0c4cf5976ba0b9e02ebda5', - 'sha256': '39007420ca5de7cb3cfc15196335507ee76c98930e7e0afa4d274' - '7d3bf96c926', - 'blake2s256': '49007420ca5de7cb3cfc15196335507ee76c98930e7e0afa4d2' - '747d3bf96c926', - 'sha1_git': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', + 'checksums': { + 'sha1': '5c6f0e2750f48fa0bd0c4cf5976ba0b9e02ebda5', + 'sha256': '39007420ca5de7cb3cfc15196335507ee76c98' + '930e7e0afa4d2747d3bf96c926', + 'blake2s256': '49007420ca5de7cb3cfc15196335507ee7' + '6c98930e7e0afa4d2747d3bf96c926', + 'sha1_git': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', + }, 'data': b'data in bytes', 'length': 10, 'status': 'absent', @@ -694,6 +696,9 @@ 'e76c98930e7e0afa4d2747d3bf96c926'), 'sha1_git': hashutil.hash_to_bytes( '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03'), + 'blake2s256': hashutil.hash_to_bytes( + '685395c5dc57cada459364f0946d3dd45bad5fcbab' + 'c1048edb44380f1d31d0aa'), 'target': hashutil.hash_to_bytes( '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03'), 'dir_id': hashutil.hash_to_bytes( @@ -704,10 +709,14 @@ } expected_dir_entries = { - 'sha1': '5c6f0e2750f48fa0bd0c4cf5976ba0b9e02ebda5', - 'sha256': '39007420ca5de7cb3cfc15196335507ee76c98930e7e0afa4d2747' - 'd3bf96c926', - 'sha1_git': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', + 'checksums': { + 'sha1': '5c6f0e2750f48fa0bd0c4cf5976ba0b9e02ebda5', + 'sha256': '39007420ca5de7cb3cfc15196335507ee76c98' + '930e7e0afa4d2747d3bf96c926', + 'sha1_git': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', + 'blake2s256': '685395c5dc57cada459364f0946d3dd45bad5f' + 'cbabc1048edb44380f1d31d0aa', + }, 'target': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', 'dir_id': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', 'name': 'bob', diff --git a/swh/web/tests/common/test_service.py b/swh/web/tests/common/test_service.py --- a/swh/web/tests/common/test_service.py +++ b/swh/web/tests/common/test_service.py @@ -18,12 +18,15 @@ class ServiceTestCase(unittest.TestCase): def setUp(self): - self.SHA1_SAMPLE = '18d8be353ed3480476f032475e7c233eff7371d5' + self.BLAKE2S256_SAMPLE = ('685395c5dc57cada459364f0946d3dd45b' + 'ad5fcbabc1048edb44380f1d31d0aa') + self.BLAKE2S256_SAMPLE_BIN = hash_to_bytes(self.BLAKE2S256_SAMPLE) + self.SHA1_SAMPLE = '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03' self.SHA1_SAMPLE_BIN = hash_to_bytes(self.SHA1_SAMPLE) - self.SHA256_SAMPLE = ('39007420ca5de7cb3cfc15196335507e' - 'e76c98930e7e0afa4d2747d3bf96c926') + self.SHA256_SAMPLE = ('8abb0aa566452620ecce816eecdef4792d77a' + '293ad8ea82a4d5ecb4d36f7e560') self.SHA256_SAMPLE_BIN = hash_to_bytes(self.SHA256_SAMPLE) - self.SHA1GIT_SAMPLE = '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03' + self.SHA1GIT_SAMPLE = '25d1a2e8f32937b0f498a5ca87f823d8df013c01' self.SHA1GIT_SAMPLE_BIN = hash_to_bytes(self.SHA1GIT_SAMPLE) self.DIRECTORY_ID = '7834ef7e7c357ce2af928115c6c6a42b7e2a44e6' self.DIRECTORY_ID_BIN = hash_to_bytes(self.DIRECTORY_ID) @@ -84,13 +87,17 @@ } self.SAMPLE_CONTENT = { - 'sha1': self.SHA1_SAMPLE, - 'sha256': self.SHA256_SAMPLE, - 'sha1_git': self.SHA1GIT_SAMPLE, + 'checksums': { + 'blake2s256': self.BLAKE2S256_SAMPLE, + 'sha1': self.SHA1_SAMPLE, + 'sha256': self.SHA256_SAMPLE, + 'sha1_git': self.SHA1GIT_SAMPLE, + }, 'length': 190, 'status': 'absent' } self.SAMPLE_CONTENT_RAW = { + 'blake2s256': self.BLAKE2S256_SAMPLE_BIN, 'sha1': self.SHA1_SAMPLE_BIN, 'sha256': self.SHA256_SAMPLE_BIN, 'sha1_git': self.SHA1GIT_SAMPLE_BIN, @@ -1292,7 +1299,9 @@ expected_content = { 'status': 'visible', - 'sha1': hash_to_hex(b'content-sha1'), + 'checksums': { + 'sha1': hash_to_hex(b'content-sha1'), + }, 'data': b'some raw data' } @@ -1567,7 +1576,7 @@ # when actual_content = service.lookup_content_raw( - 'sha1:18d8be353ed3480476f032475e7c233eff7371d5') + 'sha1:' + self.SHA1_SAMPLE) # then self.assertIsNone(actual_content) @@ -1724,6 +1733,7 @@ 'sha1': self.SHA1_SAMPLE_BIN, 'sha256': self.SHA256_SAMPLE_BIN, 'sha1_git': self.SHA1GIT_SAMPLE_BIN, + 'blake2s256': self.BLAKE2S256_SAMPLE_BIN, 'target': hash_to_bytes( '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03'), 'dir_id': self.DIRECTORY_ID_BIN, @@ -1732,9 +1742,12 @@ }] expected_dir_entries = [{ - 'sha1': self.SHA1_SAMPLE, - 'sha256': self.SHA256_SAMPLE, - 'sha1_git': self.SHA1GIT_SAMPLE, + 'checksums': { + 'sha1': self.SHA1_SAMPLE, + 'sha256': self.SHA256_SAMPLE, + 'sha1_git': self.SHA1GIT_SAMPLE, + 'blake2s256': self.BLAKE2S256_SAMPLE + }, 'target': '40e71b8614fcd89ccd17ca2b1d9e66c5b00a6d03', 'dir_id': self.DIRECTORY_ID, 'name': 'bob',