diff --git a/swh/indexer/tests/test_origin_head.py b/swh/indexer/tests/test_origin_head.py index 7c875d1..c8d7909 100644 --- a/swh/indexer/tests/test_origin_head.py +++ b/swh/indexer/tests/test_origin_head.py @@ -1,157 +1,154 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from swh.indexer.origin_head import OriginHeadIndexer from swh.indexer.tests.utils import ( BASE_TEST_CONFIG, fill_storage ) ORIGIN_HEAD_CONFIG = { **BASE_TEST_CONFIG, 'tools': { 'name': 'origin-metadata', 'version': '0.0.1', 'configuration': {}, }, 'tasks': { 'revision_intrinsic_metadata': None, 'origin_intrinsic_metadata': None, } } class OriginHeadTestIndexer(OriginHeadIndexer): """Specific indexer whose configuration is enough to satisfy the indexing tests. """ def parse_config_file(self, *args, **kwargs): return ORIGIN_HEAD_CONFIG def persist_index_computations(self, results, policy_update): self.results = results class OriginHead(unittest.TestCase): def setUp(self): self.indexer = OriginHeadTestIndexer() self.indexer.catch_exceptions = False fill_storage(self.indexer.storage) def test_git(self): self.indexer.run( ['https://github.com/SoftwareHeritage/swh-storage']) self.assertEqual(self.indexer.results, [{ 'revision_id': b'8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{' b'\xd7}\xac\xefrm', 'origin_url': 'https://github.com/SoftwareHeritage/swh-storage'}]) def test_git_partial_snapshot(self): """Checks partial snapshots are ignored.""" origin_url = 'https://github.com/SoftwareHeritage/swh-core' self.indexer.storage.origin_add_one({ 'url': origin_url, }) visit = self.indexer.storage.origin_visit_add( origin_url, '2019-02-27', type='git', ) self.indexer.storage.snapshot_add([{ 'id': b'foo', 'branches': { b'foo': None, b'HEAD': { 'target_type': 'alias', 'target': b'foo', } } }]) self.indexer.storage.origin_visit_update( origin_url, visit.visit, status='partial', snapshot=b'foo') self.indexer.run([origin_url]) self.assertEqual(self.indexer.results, []) def test_vcs_missing_snapshot(self): self.indexer.storage.origin_add([{ - 'type': 'git', 'url': 'https://github.com/SoftwareHeritage/swh-indexer', }]) self.indexer.run( ['https://github.com/SoftwareHeritage/swh-indexer']) self.assertEqual(self.indexer.results, []) def test_pypi_missing_branch(self): origin_url = 'https://pypi.org/project/abcdef/' self.indexer.storage.origin_add_one({ 'url': origin_url, }) visit = self.indexer.storage.origin_visit_add( origin_url, '2019-02-27', type='pypi') self.indexer.storage.snapshot_add([{ 'id': b'foo', 'branches': { b'foo': None, b'HEAD': { 'target_type': 'alias', 'target': b'foo', } } }]) self.indexer.storage.origin_visit_update( origin_url, visit.visit, status='full', snapshot=b'foo') self.indexer.run(['https://pypi.org/project/abcdef/']) self.assertEqual(self.indexer.results, []) def test_ftp(self): self.indexer.run( ['rsync://ftp.gnu.org/gnu/3dldf']) self.assertEqual(self.indexer.results, [{ 'revision_id': b'\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee' b'\xcc\x1a\xb4`\x8c\x8by', 'origin_url': 'rsync://ftp.gnu.org/gnu/3dldf'}]) def test_ftp_missing_snapshot(self): self.indexer.storage.origin_add([{ - 'type': 'ftp', 'url': 'rsync://ftp.gnu.org/gnu/foobar', }]) self.indexer.run( ['rsync://ftp.gnu.org/gnu/foobar']) self.assertEqual(self.indexer.results, []) def test_deposit(self): self.indexer.run( ['https://forge.softwareheritage.org/source/' 'jesuisgpl/']) self.assertEqual(self.indexer.results, [{ 'revision_id': b'\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{' b'\xa6\xe9\x99\xb1\x9e]q\xeb', 'origin_url': 'https://forge.softwareheritage.org/source/' 'jesuisgpl/'}]) def test_deposit_missing_snapshot(self): self.indexer.storage.origin_add([{ - 'type': 'deposit', 'url': 'https://forge.softwareheritage.org/source/foobar', }]) self.indexer.run( ['https://forge.softwareheritage.org/source/foobar']) self.assertEqual(self.indexer.results, []) def test_pypi(self): self.indexer.run( ['https://pypi.org/project/limnoria/']) self.assertEqual(self.indexer.results, [{ 'revision_id': b'\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k' b'A\x10\x9d\xc5\xfa2\xf8t', 'origin_url': 'https://pypi.org/project/limnoria/'}]) def test_svn(self): self.indexer.run( ['http://0-512-md.googlecode.com/svn/']) self.assertEqual(self.indexer.results, [{ 'revision_id': b'\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8' b'\xc9\xad#.\x1bw=\x18', 'origin_url': 'http://0-512-md.googlecode.com/svn/'}]) diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py index b10b518..c7999f6 100644 --- a/swh/indexer/tests/test_origin_metadata.py +++ b/swh/indexer/tests/test_origin_metadata.py @@ -1,247 +1,245 @@ -# Copyright (C) 2018-2019 The Software Heritage developers +# Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from unittest.mock import patch from swh.model.hashutil import hash_to_bytes from swh.indexer.metadata import OriginMetadataIndexer from .utils import YARN_PARSER_METADATA from .test_metadata import REVISION_METADATA_CONFIG def test_origin_metadata_indexer(idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) indexer.run(["https://github.com/librariesio/yarn-parser"]) origin = 'https://github.com/librariesio/yarn-parser' rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') rev_metadata = { 'id': rev_id, 'metadata': YARN_PARSER_METADATA, 'mappings': ['npm'], } origin_metadata = { 'id': origin, 'from_revision': rev_id, 'metadata': YARN_PARSER_METADATA, 'mappings': ['npm'], } results = list( indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) for result in results: del result['tool'] assert results == [rev_metadata] results = list(indexer.idx_storage.origin_intrinsic_metadata_get([ origin])) for result in results: del result['tool'] assert results == [origin_metadata] def test_origin_metadata_indexer_duplicate_origin( idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) indexer.storage = storage indexer.idx_storage = idx_storage indexer.run(["https://github.com/librariesio/yarn-parser"]) indexer.run(["https://github.com/librariesio/yarn-parser"]*2) origin = 'https://github.com/librariesio/yarn-parser' rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') results = list( indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) assert len(results) == 1 results = list(indexer.idx_storage.origin_intrinsic_metadata_get([ origin])) assert len(results) == 1 def test_origin_metadata_indexer_missing_head( idx_storage, storage, obj_storage): storage.origin_add([{ - 'type': 'git', 'url': 'https://example.com' }]) indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) indexer.run(["https://example.com"]) origin = 'https://example.com' results = list(indexer.idx_storage.origin_intrinsic_metadata_get([ origin])) assert results == [] def test_origin_metadata_indexer_partial_missing_head( idx_storage, storage, obj_storage): storage.origin_add([{ - 'type': 'git', 'url': 'https://example.com' }]) indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) indexer.run(["https://example.com", "https://github.com/librariesio/yarn-parser"]) origin1 = 'https://example.com' origin2 = 'https://github.com/librariesio/yarn-parser' rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') rev_metadata = { 'id': rev_id, 'metadata': YARN_PARSER_METADATA, 'mappings': ['npm'], } origin_metadata = { 'id': origin2, 'from_revision': rev_id, 'metadata': YARN_PARSER_METADATA, 'mappings': ['npm'], } results = list( indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) for result in results: del result['tool'] assert results == [rev_metadata] results = list(indexer.idx_storage.origin_intrinsic_metadata_get([ origin1, origin2])) for result in results: del result['tool'] assert results == [origin_metadata] def test_origin_metadata_indexer_duplicate_revision( idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) indexer.storage = storage indexer.idx_storage = idx_storage indexer.run(["https://github.com/librariesio/yarn-parser", "https://github.com/librariesio/yarn-parser.git"]) origin1 = 'https://github.com/librariesio/yarn-parser' origin2 = 'https://github.com/librariesio/yarn-parser.git' rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') results = list( indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) assert len(results) == 1 results = list(indexer.idx_storage.origin_intrinsic_metadata_get( [origin1, origin2])) assert len(results) == 2 def test_origin_metadata_indexer_no_metadata_file( idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) with patch('swh.indexer.metadata_dictionary.npm.NpmMapping.filename', b'foo.json'): indexer.run(["https://github.com/librariesio/yarn-parser"]) origin = 'https://github.com/librariesio/yarn-parser' rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') results = list( indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) assert results == [] results = list(indexer.idx_storage.origin_intrinsic_metadata_get([ origin])) assert results == [] def test_origin_metadata_indexer_no_metadata( idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) with patch('swh.indexer.metadata.RevisionMetadataIndexer' '.translate_revision_intrinsic_metadata', return_value=(['npm'], {'@context': 'foo'})): indexer.run(["https://github.com/librariesio/yarn-parser"]) origin = 'https://github.com/librariesio/yarn-parser' rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') results = list( indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) assert results == [] results = list(indexer.idx_storage.origin_intrinsic_metadata_get([ origin])) assert results == [] def test_origin_metadata_indexer_error( idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) with patch('swh.indexer.metadata.RevisionMetadataIndexer' '.translate_revision_intrinsic_metadata', return_value=None): indexer.run(["https://github.com/librariesio/yarn-parser"]) origin = 'https://github.com/librariesio/yarn-parser' rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') results = list( indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) assert results == [] results = list(indexer.idx_storage.origin_intrinsic_metadata_get([ origin])) assert results == [] def test_origin_metadata_indexer_delete_metadata( idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) indexer.run(["https://github.com/librariesio/yarn-parser"]) origin = 'https://github.com/librariesio/yarn-parser' rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') results = list( indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) assert results != [] results = list(indexer.idx_storage.origin_intrinsic_metadata_get([ origin])) assert results != [] with patch('swh.indexer.metadata_dictionary.npm.NpmMapping.filename', b'foo.json'): indexer.run(["https://github.com/librariesio/yarn-parser"]) results = list( indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) assert results == [] results = list(indexer.idx_storage.origin_intrinsic_metadata_get([ origin])) assert results == [] def test_origin_metadata_indexer_unknown_origin( idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) result = indexer.index_list(["https://unknown.org/foo"]) assert not result