diff --git a/swh/indexer/tests/conftest.py b/swh/indexer/tests/conftest.py --- a/swh/indexer/tests/conftest.py +++ b/swh/indexer/tests/conftest.py @@ -18,9 +18,6 @@ TASK_NAMES = ["revision_intrinsic_metadata", "origin_intrinsic_metadata"] -storage_config = {"cls": "pipeline", "steps": [{"cls": "validate"}, {"cls": "memory"},]} - - @pytest.fixture def indexer_scheduler(swh_scheduler): for taskname in TASK_NAMES: @@ -56,7 +53,7 @@ classes. """ - storage = get_storage(**storage_config) + storage = get_storage(cls="memory") fill_storage(storage) with patch("swh.storage.in_memory.InMemoryStorage") as storage_mock: storage_mock.return_value = storage diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -6,11 +6,10 @@ import json import unittest -import attr - from hypothesis import given, strategies, settings, HealthCheck from swh.model.hashutil import hash_to_bytes +from swh.model.model import Directory, DirectoryEntry, Revision from swh.indexer.codemeta import CODEMETA_TERMS from swh.indexer.metadata_dictionary import MAPPINGS @@ -20,6 +19,8 @@ from swh.indexer.metadata_detector import detect_metadata from swh.indexer.metadata import ContentMetadataIndexer, RevisionMetadataIndexer +from swh.indexer.tests.utils import REVISION, DIRECTORY2 + from .utils import ( BASE_TEST_CONFIG, fill_obj_storage, @@ -1105,32 +1106,31 @@ fill_storage(metadata_indexer.storage) tool = metadata_indexer.idx_storage.indexer_configuration_get( - {"tool_" + k: v for (k, v) in TRANSLATOR_TOOL.items()} + {f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()} ) assert tool is not None + rev = REVISION + assert rev.directory == DIRECTORY2.id metadata_indexer.idx_storage.content_metadata_add( [ { "indexer_configuration_id": tool["id"], - "id": b"cde", + "id": DIRECTORY2.entries[0].target, "metadata": YARN_PARSER_METADATA, } ] ) - sha1_gits = [ - hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f"), - ] - metadata_indexer.run(sha1_gits, "update-dups") + metadata_indexer.run([rev.id], "update-dups") results = list( - metadata_indexer.idx_storage.revision_intrinsic_metadata_get(sha1_gits) + metadata_indexer.idx_storage.revision_intrinsic_metadata_get([REVISION.id]) ) expected_results = [ { - "id": hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f"), + "id": rev.id, "tool": TRANSLATOR_TOOL, "metadata": YARN_PARSER_METADATA, "mappings": ["npm"], @@ -1141,7 +1141,7 @@ del result["tool"]["id"] # then - self.assertEqual(expected_results, results) + self.assertEqual(results, expected_results) def test_revision_metadata_indexer_single_root_dir(self): metadata_indexer = RevisionMetadataIndexer(config=REVISION_METADATA_CONFIG) @@ -1150,28 +1150,26 @@ # Add a parent directory, that is the only directory at the root # of the revision - rev_id = hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f") - rev = metadata_indexer.storage._revisions[rev_id] - subdir_id = rev.directory - rev = attr.evolve(rev, directory=b"123456") - metadata_indexer.storage.directory_add( - [ - { - "id": b"123456", - "entries": [ - { - "name": b"foobar-1.0.0", - "type": "dir", - "target": subdir_id, - "perms": 16384, - } - ], - } - ] + rev = REVISION + assert rev.directory == DIRECTORY2.id + + directory = Directory( + entries=( + DirectoryEntry( + name=b"foobar-1.0.0", type="dir", target=rev.directory, perms=16384, + ), + ), ) + assert directory.id is not None + metadata_indexer.storage.directory_add([directory]) + + new_rev_dict = {**rev.to_dict(), "directory": directory.id} + new_rev_dict.pop("id") + new_rev = Revision.from_dict(new_rev_dict) + metadata_indexer.storage.revision_add([new_rev]) tool = metadata_indexer.idx_storage.indexer_configuration_get( - {"tool_" + k: v for (k, v) in TRANSLATOR_TOOL.items()} + {f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()} ) assert tool is not None @@ -1179,24 +1177,21 @@ [ { "indexer_configuration_id": tool["id"], - "id": b"cde", + "id": DIRECTORY2.entries[0].target, "metadata": YARN_PARSER_METADATA, } ] ) - sha1_gits = [ - hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f"), - ] - metadata_indexer.run(sha1_gits, "update-dups") + metadata_indexer.run([new_rev.id], "update-dups") results = list( - metadata_indexer.idx_storage.revision_intrinsic_metadata_get(sha1_gits) + metadata_indexer.idx_storage.revision_intrinsic_metadata_get([new_rev.id]) ) expected_results = [ { - "id": hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f"), + "id": new_rev.id, "tool": TRANSLATOR_TOOL, "metadata": YARN_PARSER_METADATA, "mappings": ["npm"], @@ -1207,4 +1202,4 @@ del result["tool"]["id"] # then - self.assertEqual(expected_results, results) + self.assertEqual(results, expected_results) diff --git a/swh/indexer/tests/test_origin_head.py b/swh/indexer/tests/test_origin_head.py --- a/swh/indexer/tests/test_origin_head.py +++ b/swh/indexer/tests/test_origin_head.py @@ -10,6 +10,8 @@ from swh.indexer.origin_head import OriginHeadIndexer from swh.indexer.tests.utils import BASE_TEST_CONFIG, fill_storage from swh.storage.utils import now +from swh.model.model import Origin, Snapshot, SnapshotBranch, TargetType + ORIGIN_HEAD_CONFIG = { **BASE_TEST_CONFIG, @@ -37,22 +39,17 @@ fill_storage(self.indexer.storage) def test_git(self): - self.indexer.run(["https://github.com/SoftwareHeritage/swh-storage"]) + origin_url = "https://github.com/SoftwareHeritage/swh-storage" + self.indexer.run([origin_url]) + rev_id = b"8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{\xd7}\xac\xefrm" self.assertEqual( - self.indexer.results, - [ - { - "revision_id": b"8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{" - b"\xd7}\xac\xefrm", - "origin_url": "https://github.com/SoftwareHeritage/swh-storage", - } - ], + self.indexer.results, [{"revision_id": rev_id, "origin_url": origin_url,}], ) def test_git_partial_snapshot(self): """Checks partial snapshots are ignored.""" origin_url = "https://github.com/SoftwareHeritage/swh-core" - self.indexer.storage.origin_add([{"url": origin_url,}]) + self.indexer.storage.origin_add([Origin(url=origin_url)]) visit = self.indexer.storage.origin_visit_add( [ OriginVisit( @@ -64,13 +61,14 @@ )[0] self.indexer.storage.snapshot_add( [ - { - "id": b"foo", - "branches": { + Snapshot( + branches={ b"foo": None, - b"HEAD": {"target_type": "alias", "target": b"foo",}, + b"HEAD": SnapshotBranch( + target_type=TargetType.ALIAS, target=b"foo", + ), }, - } + ), ] ) visit_status = OriginVisitStatus( @@ -85,15 +83,14 @@ self.assertEqual(self.indexer.results, []) def test_vcs_missing_snapshot(self): - self.indexer.storage.origin_add( - [{"url": "https://github.com/SoftwareHeritage/swh-indexer",}] - ) - self.indexer.run(["https://github.com/SoftwareHeritage/swh-indexer"]) + origin_url = "https://github.com/SoftwareHeritage/swh-indexer" + self.indexer.storage.origin_add([Origin(url=origin_url)]) + self.indexer.run([origin_url]) self.assertEqual(self.indexer.results, []) def test_pypi_missing_branch(self): origin_url = "https://pypi.org/project/abcdef/" - self.indexer.storage.origin_add([{"url": origin_url,}]) + self.indexer.storage.origin_add([Origin(url=origin_url,)]) visit = self.indexer.storage.origin_visit_add( [ OriginVisit( @@ -105,13 +102,14 @@ )[0] self.indexer.storage.snapshot_add( [ - { - "id": b"foo", - "branches": { + Snapshot( + branches={ b"foo": None, - b"HEAD": {"target_type": "alias", "target": b"foo",}, + b"HEAD": SnapshotBranch( + target_type=TargetType.ALIAS, target=b"foo", + ), }, - } + ) ] ) visit_status = OriginVisitStatus( @@ -126,66 +124,47 @@ self.assertEqual(self.indexer.results, []) def test_ftp(self): - self.indexer.run(["rsync://ftp.gnu.org/gnu/3dldf"]) + origin_url = "rsync://ftp.gnu.org/gnu/3dldf" + self.indexer.run([origin_url]) + rev_id = b"\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee\xcc\x1a\xb4`\x8c\x8by" self.assertEqual( - self.indexer.results, - [ - { - "revision_id": b"\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee" - b"\xcc\x1a\xb4`\x8c\x8by", - "origin_url": "rsync://ftp.gnu.org/gnu/3dldf", - } - ], + self.indexer.results, [{"revision_id": rev_id, "origin_url": origin_url,}], ) def test_ftp_missing_snapshot(self): - self.indexer.storage.origin_add([{"url": "rsync://ftp.gnu.org/gnu/foobar",}]) - self.indexer.run(["rsync://ftp.gnu.org/gnu/foobar"]) + origin_url = "rsync://ftp.gnu.org/gnu/foobar" + self.indexer.storage.origin_add([Origin(url=origin_url)]) + self.indexer.run([origin_url]) self.assertEqual(self.indexer.results, []) def test_deposit(self): - self.indexer.run(["https://forge.softwareheritage.org/source/jesuisgpl/"]) + origin_url = "https://forge.softwareheritage.org/source/jesuisgpl/" + self.indexer.storage.origin_add([Origin(url=origin_url)]) + self.indexer.run([origin_url]) + rev_id = b"\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{\xa6\xe9\x99\xb1\x9e]q\xeb" self.assertEqual( - self.indexer.results, - [ - { - "revision_id": b"\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{" - b"\xa6\xe9\x99\xb1\x9e]q\xeb", - "origin_url": "https://forge.softwareheritage.org/source/" - "jesuisgpl/", - } - ], + self.indexer.results, [{"revision_id": rev_id, "origin_url": origin_url,}], ) def test_deposit_missing_snapshot(self): - self.indexer.storage.origin_add( - [{"url": "https://forge.softwareheritage.org/source/foobar",}] - ) - self.indexer.run(["https://forge.softwareheritage.org/source/foobar"]) + origin_url = "https://forge.softwareheritage.org/source/foobar" + self.indexer.storage.origin_add([Origin(url=origin_url,)]) + self.indexer.run([origin_url]) self.assertEqual(self.indexer.results, []) def test_pypi(self): - self.indexer.run(["https://pypi.org/project/limnoria/"]) + origin_url = "https://pypi.org/project/limnoria/" + self.indexer.run([origin_url]) + + rev_id = b"\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8kA\x10\x9d\xc5\xfa2\xf8t" self.assertEqual( - self.indexer.results, - [ - { - "revision_id": b"\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k" - b"A\x10\x9d\xc5\xfa2\xf8t", - "origin_url": "https://pypi.org/project/limnoria/", - } - ], + self.indexer.results, [{"revision_id": rev_id, "origin_url": origin_url}], ) def test_svn(self): - self.indexer.run(["http://0-512-md.googlecode.com/svn/"]) + origin_url = "http://0-512-md.googlecode.com/svn/" + self.indexer.run([origin_url]) + rev_id = b"\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8\xc9\xad#.\x1bw=\x18" self.assertEqual( - self.indexer.results, - [ - { - "revision_id": b"\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8" - b"\xc9\xad#.\x1bw=\x18", - "origin_url": "http://0-512-md.googlecode.com/svn/", - } - ], + self.indexer.results, [{"revision_id": rev_id, "origin_url": origin_url,}], ) diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py --- a/swh/indexer/tests/test_origin_metadata.py +++ b/swh/indexer/tests/test_origin_metadata.py @@ -5,22 +5,21 @@ from unittest.mock import patch -from swh.model.hashutil import hash_to_bytes - from swh.indexer.metadata import OriginMetadataIndexer -from .utils import YARN_PARSER_METADATA +from swh.model.model import Origin + +from .utils import YARN_PARSER_METADATA, REVISION from .test_metadata import REVISION_METADATA_CONFIG def test_origin_metadata_indexer(idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) - indexer.run(["https://github.com/librariesio/yarn-parser"]) - origin = "https://github.com/librariesio/yarn-parser" - rev_id = hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f") + indexer.run([origin]) + rev_id = REVISION.id rev_metadata = { "id": rev_id, "metadata": YARN_PARSER_METADATA, @@ -49,11 +48,10 @@ indexer.storage = storage indexer.idx_storage = idx_storage indexer.run(["https://github.com/librariesio/yarn-parser"]) - indexer.run(["https://github.com/librariesio/yarn-parser"] * 2) origin = "https://github.com/librariesio/yarn-parser" - rev_id = hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f") + rev_id = REVISION.id results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) assert len(results) == 1 @@ -63,8 +61,7 @@ def test_origin_metadata_indexer_missing_head(idx_storage, storage, obj_storage): - - storage.origin_add([{"url": "https://example.com"}]) + storage.origin_add([Origin(url="https://example.com")]) indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) indexer.run(["https://example.com"]) @@ -79,54 +76,45 @@ idx_storage, storage, obj_storage ): - storage.origin_add([{"url": "https://example.com"}]) - - indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) - indexer.run(["https://example.com", "https://github.com/librariesio/yarn-parser"]) - origin1 = "https://example.com" origin2 = "https://github.com/librariesio/yarn-parser" - rev_id = hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f") + storage.origin_add([Origin(url=origin1)]) + indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) + indexer.run([origin1, origin2]) - rev_metadata = { - "id": rev_id, - "metadata": YARN_PARSER_METADATA, - "mappings": ["npm"], - } - origin_metadata = { - "id": origin2, - "from_revision": rev_id, - "metadata": YARN_PARSER_METADATA, - "mappings": ["npm"], - } + rev_id = REVISION.id results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) for result in results: del result["tool"] - assert results == [rev_metadata] + assert results == [ + {"id": rev_id, "metadata": YARN_PARSER_METADATA, "mappings": ["npm"],} + ] results = list( indexer.idx_storage.origin_intrinsic_metadata_get([origin1, origin2]) ) for result in results: del result["tool"] - assert results == [origin_metadata] + assert results == [ + { + "id": origin2, + "from_revision": rev_id, + "metadata": YARN_PARSER_METADATA, + "mappings": ["npm"], + } + ] def test_origin_metadata_indexer_duplicate_revision(idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) indexer.storage = storage indexer.idx_storage = idx_storage - indexer.run( - [ - "https://github.com/librariesio/yarn-parser", - "https://github.com/librariesio/yarn-parser.git", - ] - ) - origin1 = "https://github.com/librariesio/yarn-parser" origin2 = "https://github.com/librariesio/yarn-parser.git" - rev_id = hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f") + indexer.run([origin1, origin2]) + + rev_id = REVISION.id results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) assert len(results) == 1 @@ -140,11 +128,11 @@ def test_origin_metadata_indexer_no_metadata_file(idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) + origin = "https://github.com/librariesio/yarn-parser" with patch("swh.indexer.metadata_dictionary.npm.NpmMapping.filename", b"foo.json"): - indexer.run(["https://github.com/librariesio/yarn-parser"]) + indexer.run([origin]) - origin = "https://github.com/librariesio/yarn-parser" - rev_id = hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f") + rev_id = REVISION.id results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) assert results == [] @@ -156,15 +144,15 @@ def test_origin_metadata_indexer_no_metadata(idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) + origin = "https://github.com/librariesio/yarn-parser" with patch( "swh.indexer.metadata.RevisionMetadataIndexer" ".translate_revision_intrinsic_metadata", return_value=(["npm"], {"@context": "foo"}), ): - indexer.run(["https://github.com/librariesio/yarn-parser"]) + indexer.run([origin]) - origin = "https://github.com/librariesio/yarn-parser" - rev_id = hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f") + rev_id = REVISION.id results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) assert results == [] @@ -176,15 +164,15 @@ def test_origin_metadata_indexer_error(idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) + origin = "https://github.com/librariesio/yarn-parser" with patch( "swh.indexer.metadata.RevisionMetadataIndexer" ".translate_revision_intrinsic_metadata", return_value=None, ): - indexer.run(["https://github.com/librariesio/yarn-parser"]) + indexer.run([origin]) - origin = "https://github.com/librariesio/yarn-parser" - rev_id = hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f") + rev_id = REVISION.id results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) assert results == [] @@ -196,10 +184,10 @@ def test_origin_metadata_indexer_delete_metadata(idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) - indexer.run(["https://github.com/librariesio/yarn-parser"]) - origin = "https://github.com/librariesio/yarn-parser" - rev_id = hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f") + indexer.run([origin]) + + rev_id = REVISION.id results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) assert results != [] @@ -208,7 +196,7 @@ assert results != [] with patch("swh.indexer.metadata_dictionary.npm.NpmMapping.filename", b"foo.json"): - indexer.run(["https://github.com/librariesio/yarn-parser"]) + indexer.run([origin]) results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) assert results == [] diff --git a/swh/indexer/tests/utils.py b/swh/indexer/tests/utils.py --- a/swh/indexer/tests/utils.py +++ b/swh/indexer/tests/utils.py @@ -5,7 +5,6 @@ import abc import functools -import random from typing import Dict, Any import unittest @@ -13,175 +12,221 @@ from swh.model import hashutil from swh.model.hashutil import hash_to_bytes, hash_to_hex -from swh.model.model import OriginVisit, OriginVisitStatus +from swh.model.model import ( + Content, + Directory, + DirectoryEntry, + Origin, + OriginVisit, + OriginVisitStatus, + Person, + Revision, + RevisionType, + Snapshot, + SnapshotBranch, + TargetType, + Timestamp, + TimestampWithTimezone, +) from swh.storage.utils import now from swh.indexer.storage import INDEXER_CFG_KEY BASE_TEST_CONFIG: Dict[str, Dict[str, Any]] = { - "storage": {"cls": "pipeline", "steps": [{"cls": "validate"}, {"cls": "memory"},]}, + "storage": {"cls": "memory"}, "objstorage": {"cls": "memory", "args": {},}, INDEXER_CFG_KEY: {"cls": "memory", "args": {},}, } + +ORIGINS = [ + Origin(url="https://github.com/SoftwareHeritage/swh-storage"), + Origin(url="rsync://ftp.gnu.org/gnu/3dldf"), + Origin(url="https://forge.softwareheritage.org/source/jesuisgpl/"), + Origin(url="https://pypi.org/project/limnoria/"), + Origin(url="http://0-512-md.googlecode.com/svn/"), + Origin(url="https://github.com/librariesio/yarn-parser"), + Origin(url="https://github.com/librariesio/yarn-parser.git"), +] + + ORIGIN_VISITS = [ - {"type": "git", "url": "https://github.com/SoftwareHeritage/swh-storage"}, - {"type": "ftp", "url": "rsync://ftp.gnu.org/gnu/3dldf"}, - {"type": "deposit", "url": "https://forge.softwareheritage.org/source/jesuisgpl/"}, - {"type": "pypi", "url": "https://pypi.org/project/limnoria/"}, - {"type": "svn", "url": "http://0-512-md.googlecode.com/svn/"}, - {"type": "git", "url": "https://github.com/librariesio/yarn-parser"}, - {"type": "git", "url": "https://github.com/librariesio/yarn-parser.git"}, + {"type": "git", "origin": ORIGINS[0].url}, + {"type": "ftp", "origin": ORIGINS[1].url}, + {"type": "deposit", "origin": ORIGINS[2].url}, + {"type": "pypi", "origin": ORIGINS[3].url}, + {"type": "svn", "origin": ORIGINS[4].url}, + {"type": "git", "origin": ORIGINS[5].url}, + {"type": "git", "origin": ORIGINS[6].url}, ] + +DIRECTORY = Directory( + id=hash_to_bytes("34f335a750111ca0a8b64d8034faec9eedc396be"), + entries=( + DirectoryEntry( + name=b"index.js", + type="file", + target=hash_to_bytes("01c9379dfc33803963d07c1ccc748d3fe4c96bb5"), + perms=0o100644, + ), + DirectoryEntry( + name=b"package.json", + type="file", + target=hash_to_bytes("26a9f72a7c87cc9205725cfd879f514ff4f3d8d5"), + perms=0o100644, + ), + DirectoryEntry( + name=b".github", + type="dir", + target=Directory(entries=()).id, + perms=0o040000, + ), + ), +) + +DIRECTORY2 = Directory( + id=b"\xf8zz\xa1\x12`<1$\xfav\xf9\x01\xfd5\x85F`\xf2\xb6", + entries=( + DirectoryEntry( + name=b"package.json", + type="file", + target=hash_to_bytes("f5305243b3ce7ef8dc864ebc73794da304025beb"), + perms=0o100644, + ), + ), +) + +REVISION = Revision( + id=hash_to_bytes("c6201cb1b9b9df9a7542f9665c3b5dfab85e9775"), + message=b"Improve search functionality", + author=Person( + name=b"Andrew Nesbitt", + fullname=b"Andrew Nesbitt ", + email=b"andrewnez@gmail.com", + ), + committer=Person( + name=b"Andrew Nesbitt", + fullname=b"Andrew Nesbitt ", + email=b"andrewnez@gmail.com", + ), + committer_date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1380883849, microseconds=0,), + offset=120, + negative_utc=False, + ), + type=RevisionType.GIT, + synthetic=False, + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1487596456, microseconds=0,), + offset=0, + negative_utc=False, + ), + directory=DIRECTORY2.id, + parents=(), +) + +REVISIONS = [REVISION] + SNAPSHOTS = [ - { - "origin": "https://github.com/SoftwareHeritage/swh-storage", - "branches": { - b"refs/heads/add-revision-origin-cache": { - "target": b'L[\xce\x1c\x88\x8eF\t\xf1"\x19\x1e\xfb\xc0' - b"s\xe7/\xe9l\x1e", - "target_type": "revision", - }, - b"refs/head/master": { - "target": b"8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{\xd7}" b"\xac\xefrm", - "target_type": "revision", - }, - b"HEAD": {"target": b"refs/head/master", "target_type": "alias"}, - b"refs/tags/v0.0.103": { - "target": b'\xb6"Im{\xfdLb\xb0\x94N\xea\x96m\x13x\x88+' b"\x0f\xdd", - "target_type": "release", - }, - }, - }, - { - "origin": "rsync://ftp.gnu.org/gnu/3dldf", - "branches": { - b"3DLDF-1.1.4.tar.gz": { - "target": b"dJ\xfb\x1c\x91\xf4\x82B%]6\xa2\x90|\xd3\xfc" b'"G\x99\x11', - "target_type": "revision", - }, - b"3DLDF-2.0.2.tar.gz": { - "target": b"\xb6\x0e\xe7\x9e9\xac\xaa\x19\x9e=" - b"\xd1\xc5\x00\\\xc6\xfc\xe0\xa6\xb4V", - "target_type": "revision", - }, - b"3DLDF-2.0.3-examples.tar.gz": { - "target": b"!H\x19\xc0\xee\x82-\x12F1\xbd\x97" - b"\xfe\xadZ\x80\x80\xc1\x83\xff", - "target_type": "revision", - }, - b"3DLDF-2.0.3.tar.gz": { - "target": b"\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee" - b"\xcc\x1a\xb4`\x8c\x8by", - "target_type": "revision", - }, - b"3DLDF-2.0.tar.gz": { - "target": b"F6*\xff(?\x19a\xef\xb6\xc2\x1fv$S\xe3G" b"\xd3\xd1m", - "target_type": "revision", - }, - }, - }, - { - "origin": "https://forge.softwareheritage.org/source/jesuisgpl/", - "branches": { - b"master": { - "target": b"\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{" - b"\xa6\xe9\x99\xb1\x9e]q\xeb", - "target_type": "revision", - } - }, - "id": b"h\xc0\xd2a\x04\xd4~'\x8d\xd6\xbe\x07\xeda\xfa\xfbV" b"\x1d\r ", - }, - { - "origin": "https://pypi.org/project/limnoria/", - "branches": { - b"HEAD": {"target": b"releases/2018.09.09", "target_type": "alias"}, - b"releases/2018.09.01": { - "target": b"<\xee1(\xe8\x8d_\xc1\xc9\xa6rT\xf1\x1d" - b"\xbb\xdfF\xfdw\xcf", - "target_type": "revision", - }, - b"releases/2018.09.09": { - "target": b"\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k" - b"A\x10\x9d\xc5\xfa2\xf8t", - "target_type": "revision", - }, - }, - "id": b"{\xda\x8e\x84\x7fX\xff\x92\x80^\x93V\x18\xa3\xfay" b"\x12\x9e\xd6\xb3", - }, - { - "origin": "http://0-512-md.googlecode.com/svn/", - "branches": { - b"master": { - "target": b"\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8" - b"\xc9\xad#.\x1bw=\x18", - "target_type": "revision", - } + Snapshot( + id=hash_to_bytes("a50fde72265343b7d28cecf6db20d98a81d21965"), + branches={ + b"refs/heads/add-revision-origin-cache": SnapshotBranch( + target=b'L[\xce\x1c\x88\x8eF\t\xf1"\x19\x1e\xfb\xc0s\xe7/\xe9l\x1e', + target_type=TargetType.REVISION, + ), + b"refs/head/master": SnapshotBranch( + target=b"8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{\xd7}\xac\xefrm", + target_type=TargetType.REVISION, + ), + b"HEAD": SnapshotBranch( + target=b"refs/head/master", target_type=TargetType.ALIAS + ), + b"refs/tags/v0.0.103": SnapshotBranch( + target=b'\xb6"Im{\xfdLb\xb0\x94N\xea\x96m\x13x\x88+\x0f\xdd', + target_type=TargetType.RELEASE, + ), }, - "id": b"\xa1\xa2\x8c\n\xb3\x87\xa8\xf9\xe0a\x8c\xb7" - b"\x05\xea\xb8\x1f\xc4H\xf4s", - }, - { - "origin": "https://github.com/librariesio/yarn-parser", - "branches": { - b"HEAD": { - "target": hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f"), - "target_type": "revision", - } + ), + Snapshot( + id=hash_to_bytes("2c67f69a416bca4e1f3fcd848c588fab88ad0642"), + branches={ + b"3DLDF-1.1.4.tar.gz": SnapshotBranch( + target=b'dJ\xfb\x1c\x91\xf4\x82B%]6\xa2\x90|\xd3\xfc"G\x99\x11', + target_type=TargetType.REVISION, + ), + b"3DLDF-2.0.2.tar.gz": SnapshotBranch( + target=b"\xb6\x0e\xe7\x9e9\xac\xaa\x19\x9e=\xd1\xc5\x00\\\xc6\xfc\xe0\xa6\xb4V", # noqa + target_type=TargetType.REVISION, + ), + b"3DLDF-2.0.3-examples.tar.gz": SnapshotBranch( + target=b"!H\x19\xc0\xee\x82-\x12F1\xbd\x97\xfe\xadZ\x80\x80\xc1\x83\xff", # noqa + target_type=TargetType.REVISION, + ), + b"3DLDF-2.0.3.tar.gz": SnapshotBranch( + target=b"\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee\xcc\x1a\xb4`\x8c\x8by", # noqa + target_type=TargetType.REVISION, + ), + b"3DLDF-2.0.tar.gz": SnapshotBranch( + target=b"F6*\xff(?\x19a\xef\xb6\xc2\x1fv$S\xe3G\xd3\xd1m", + target_type=TargetType.REVISION, + ), }, - }, - { - "origin": "https://github.com/librariesio/yarn-parser.git", - "branches": { - b"HEAD": { - "target": hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f"), - "target_type": "revision", - } + ), + Snapshot( + id=hash_to_bytes("68c0d26104d47e278dd6be07ed61fafb561d0d20"), + branches={ + b"master": SnapshotBranch( + target=b"\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{\xa6\xe9\x99\xb1\x9e]q\xeb", # noqa + target_type=TargetType.REVISION, + ) }, - }, -] - - -REVISIONS = [ - { - "id": hash_to_bytes("8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f"), - "message": b"Improve search functionality", - "author": { - "name": b"Andrew Nesbitt", - "fullname": b"Andrew Nesbitt ", - "email": b"andrewnez@gmail.com", + ), + Snapshot( + id=hash_to_bytes("f255245269e15fc99d284affd79f766668de0b67"), + branches={ + b"HEAD": SnapshotBranch( + target=b"releases/2018.09.09", target_type=TargetType.ALIAS + ), + b"releases/2018.09.01": SnapshotBranch( + target=b"<\xee1(\xe8\x8d_\xc1\xc9\xa6rT\xf1\x1d\xbb\xdfF\xfdw\xcf", + target_type=TargetType.REVISION, + ), + b"releases/2018.09.09": SnapshotBranch( + target=b"\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8kA\x10\x9d\xc5\xfa2\xf8t", # noqa + target_type=TargetType.REVISION, + ), }, - "committer": { - "name": b"Andrew Nesbitt", - "fullname": b"Andrew Nesbitt ", - "email": b"andrewnez@gmail.com", + ), + Snapshot( + id=hash_to_bytes("a1a28c0ab387a8f9e0618cb705eab81fc448f473"), + branches={ + b"master": SnapshotBranch( + target=b"\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8\xc9\xad#.\x1bw=\x18", + target_type=TargetType.REVISION, + ) }, - "committer_date": { - "negative_utc": False, - "offset": 120, - "timestamp": {"microseconds": 0, "seconds": 1380883849,}, + ), + Snapshot( + id=hash_to_bytes("bb4fd3a836930ce629d912864319637040ff3040"), + branches={ + b"HEAD": SnapshotBranch( + target=REVISION.id, target_type=TargetType.REVISION, + ) }, - "type": "git", - "synthetic": False, - "date": { - "negative_utc": False, - "timestamp": {"seconds": 1487596456, "microseconds": 0,}, - "offset": 0, + ), + Snapshot( + id=hash_to_bytes("bb4fd3a836930ce629d912864319637040ff3040"), + branches={ + b"HEAD": SnapshotBranch( + target=REVISION.id, target_type=TargetType.REVISION, + ) }, - "directory": b"10", - "parents": (), - } + ), ] -DIRECTORY_ID = b"10" - -DIRECTORY_ENTRIES = [ - {"name": b"index.js", "type": "file", "target": b"abc", "perms": 33188,}, - {"name": b"package.json", "type": "file", "target": b"cde", "perms": 33188,}, - {"name": b".github", "type": "dir", "target": b"11", "perms": 16384,}, -] SHA1_TO_LICENSES = { "01c9379dfc33803963d07c1ccc748d3fe4c96bb5": ["GPL"], @@ -292,12 +337,10 @@ "a7ab314d8a11d2c93e3dcf528ca294e7b431c449": b""" """, "da39a3ee5e6b4b0d3255bfef95601890afd80709": b"", - # 626364 - hash_to_hex(b"bcd"): b"unimportant content for bcd", - # 636465 - hash_to_hex( - b"cde" - ): b""" + # was 626364 / b'bcd' + "e3e40fee6ff8a52f06c3b428bfe7c0ed2ef56e92": b"unimportant content for bcd", + # was 636465 / b'cde' now yarn-parser package.json + "f5305243b3ce7ef8dc864ebc73794da304025beb": b""" { "name": "yarn-parser", "version": "1.0.0", @@ -342,6 +385,7 @@ """, } + YARN_PARSER_METADATA = { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "url": "https://github.com/librariesio/yarn-parser#readme", @@ -503,43 +547,41 @@ def fill_storage(storage): - visit_types = {} - for visit in ORIGIN_VISITS: - storage.origin_add([{"url": visit["url"]}]) - visit_types[visit["url"]] = visit["type"] - for snap in SNAPSHOTS: - origin_url = snap["origin"] + storage.origin_add(ORIGINS) + storage.directory_add([DIRECTORY, DIRECTORY2]) + storage.revision_add(REVISIONS) + storage.snapshot_add(SNAPSHOTS) + + for visit, snapshot in zip(ORIGIN_VISITS, SNAPSHOTS): + assert snapshot.id is not None + visit = storage.origin_visit_add( - [OriginVisit(origin=origin_url, date=now(), type=visit_types[origin_url],)] + [OriginVisit(origin=visit["origin"], date=now(), type=visit["type"])] )[0] - snap_id = snap.get("id") or bytes([random.randint(0, 255) for _ in range(32)]) - storage.snapshot_add([{"id": snap_id, "branches": snap["branches"]}]) visit_status = OriginVisitStatus( - origin=origin_url, + origin=visit.origin, visit=visit.visit, date=now(), status="full", - snapshot=snap_id, + snapshot=snapshot.id, ) storage.origin_visit_status_add([visit_status]) - storage.revision_add(REVISIONS) contents = [] for (obj_id, content) in OBJ_STORAGE_DATA.items(): content_hashes = hashutil.MultiHash.from_data(content).digest() contents.append( - { - "data": content, - "length": len(content), - "status": "visible", - "sha1": hash_to_bytes(obj_id), - "sha1_git": hash_to_bytes(obj_id), - "sha256": content_hashes["sha256"], - "blake2s256": content_hashes["blake2s256"], - } + Content( + data=content, + length=len(content), + status="visible", + sha1=hash_to_bytes(obj_id), + sha1_git=hash_to_bytes(obj_id), + sha256=content_hashes["sha256"], + blake2s256=content_hashes["blake2s256"], + ) ) storage.content_add(contents) - storage.directory_add([{"id": DIRECTORY_ID, "entries": DIRECTORY_ENTRIES,}]) class CommonContentIndexerTest(metaclass=abc.ABCMeta):