diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py --- a/swh/loader/core/loader.py +++ b/swh/loader/core/loader.py @@ -70,6 +70,11 @@ origin: Origin loaded_snapshot_id: Optional[Sha1Git] + parent_origins: Optional[List[Origin]] + """If the given origin is a "forge fork" (ie. created with the "Fork" button + of GitHub-like forges), :meth:`build_extrinsic_origin_metadata` sets this to + a list of origins it was forked from; closest parent first.""" + def __init__( self, storage: StorageInterface, @@ -122,6 +127,8 @@ self.save_data_path = save_data_path + self.parent_origins = None + @classmethod def from_config(cls, storage: Dict[str, Any], **config: Any): """Instantiate a loader from a configuration dict. @@ -432,6 +439,8 @@ credentials=self.metadata_fetcher_credentials, ) metadata.extend(metadata_fetcher.get_origin_metadata()) + if self.parent_origins is None: + self.parent_origins = metadata_fetcher.get_parent_origins() return metadata diff --git a/swh/loader/core/metadata_fetchers.py b/swh/loader/core/metadata_fetchers.py --- a/swh/loader/core/metadata_fetchers.py +++ b/swh/loader/core/metadata_fetchers.py @@ -33,6 +33,9 @@ def get_origin_metadata(self) -> List[RawExtrinsicMetadata]: ... + def get_parent_origins(self) -> List[Origin]: + ... + @functools.lru_cache() def _fetchers() -> List[Type[MetadataFetcherProtocol]]: diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py --- a/swh/loader/core/tests/test_loader.py +++ b/swh/loader/core/tests/test_loader.py @@ -11,6 +11,7 @@ import pytest from swh.loader.core.loader import BaseLoader, DVCSLoader +from swh.loader.core.metadata_fetchers import MetadataFetcherProtocol from swh.loader.exception import NotFound from swh.loader.tests import assert_last_visit_matches from swh.model.hashutil import hash_to_bytes @@ -25,6 +26,7 @@ import swh.storage.exc ORIGIN = Origin(url="some-url") +PARENT_ORIGIN = Origin(url="base-origin-url") METADATA_AUTHORITY = MetadataAuthority( type=MetadataAuthorityType.FORGE, url="http://example.org/" @@ -102,6 +104,31 @@ def get_origin_metadata(self): return [REMD] + def get_parent_origins(self): + return [] + + +class DummyMetadataFetcherWithFork: + SUPPORTED_LISTERS = {"fake-lister"} + + def __init__(self, origin, credentials, lister_name, lister_instance_name): + pass + + def get_origin_metadata(self): + return [REMD] + + def get_parent_origins(self): + return [PARENT_ORIGIN] + + +def test_types(): + assert isinstance( + DummyMetadataFetcher(None, None, None, None), MetadataFetcherProtocol + ) + assert isinstance( + DummyMetadataFetcherWithFork(None, None, None, None), MetadataFetcherProtocol + ) + def test_base_loader(swh_storage): loader = DummyBaseLoader(swh_storage) @@ -138,6 +165,7 @@ assert swh_storage.raw_extrinsic_metadata_get( ORIGIN.swhid(), METADATA_AUTHORITY ).results == [REMD] + assert loader.parent_origins == [] def test_base_loader_with_unknown_lister_name(swh_storage, mocker): @@ -158,6 +186,32 @@ swh_storage.raw_extrinsic_metadata_get(ORIGIN.swhid(), METADATA_AUTHORITY) +def test_base_loader_forked_origin(swh_storage, mocker): + fetcher_cls = MagicMock(wraps=DummyMetadataFetcherWithFork) + fetcher_cls.SUPPORTED_LISTERS = DummyMetadataFetcherWithFork.SUPPORTED_LISTERS + mocker.patch( + "swh.loader.core.metadata_fetchers._fetchers", return_value=[fetcher_cls] + ) + + loader = DummyBaseLoader( + swh_storage, lister_name="fake-lister", lister_instance_name="" + ) + result = loader.load() + assert result == {"status": "eventful"} + + fetcher_cls.assert_called_once() + fetcher_cls.assert_called_once_with( + origin=ORIGIN, + credentials={}, + lister_name="fake-lister", + lister_instance_name="", + ) + assert swh_storage.raw_extrinsic_metadata_get( + ORIGIN.swhid(), METADATA_AUTHORITY + ).results == [REMD] + assert loader.parent_origins == [PARENT_ORIGIN] + + def test_dvcs_loader(swh_storage): loader = DummyDVCSLoader(swh_storage) result = loader.load()