Page MenuHomeSoftware Heritage

D7691.diff
No OneTemporary

D7691.diff

diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py
--- a/swh/loader/core/loader.py
+++ b/swh/loader/core/loader.py
@@ -70,6 +70,11 @@
origin: Origin
loaded_snapshot_id: Optional[Sha1Git]
+ parent_origins: Optional[List[Origin]]
+ """If the given origin is a "forge fork" (ie. created with the "Fork" button
+ of GitHub-like forges), :meth:`build_extrinsic_origin_metadata` sets this to
+ a list of origins it was forked from; closest parent first."""
+
def __init__(
self,
storage: StorageInterface,
@@ -122,6 +127,8 @@
self.save_data_path = save_data_path
+ self.parent_origins = None
+
@classmethod
def from_config(cls, storage: Dict[str, Any], **config: Any):
"""Instantiate a loader from a configuration dict.
@@ -432,6 +439,8 @@
credentials=self.metadata_fetcher_credentials,
)
metadata.extend(metadata_fetcher.get_origin_metadata())
+ if self.parent_origins is None:
+ self.parent_origins = metadata_fetcher.get_parent_origins()
return metadata
diff --git a/swh/loader/core/metadata_fetchers.py b/swh/loader/core/metadata_fetchers.py
--- a/swh/loader/core/metadata_fetchers.py
+++ b/swh/loader/core/metadata_fetchers.py
@@ -33,6 +33,9 @@
def get_origin_metadata(self) -> List[RawExtrinsicMetadata]:
...
+ def get_parent_origins(self) -> List[Origin]:
+ ...
+
@functools.lru_cache()
def _fetchers() -> List[Type[MetadataFetcherProtocol]]:
diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py
--- a/swh/loader/core/tests/test_loader.py
+++ b/swh/loader/core/tests/test_loader.py
@@ -11,6 +11,7 @@
import pytest
from swh.loader.core.loader import BaseLoader, DVCSLoader
+from swh.loader.core.metadata_fetchers import MetadataFetcherProtocol
from swh.loader.exception import NotFound
from swh.loader.tests import assert_last_visit_matches
from swh.model.hashutil import hash_to_bytes
@@ -25,6 +26,7 @@
import swh.storage.exc
ORIGIN = Origin(url="some-url")
+PARENT_ORIGIN = Origin(url="base-origin-url")
METADATA_AUTHORITY = MetadataAuthority(
type=MetadataAuthorityType.FORGE, url="http://example.org/"
@@ -102,6 +104,31 @@
def get_origin_metadata(self):
return [REMD]
+ def get_parent_origins(self):
+ return []
+
+
+class DummyMetadataFetcherWithFork:
+ SUPPORTED_LISTERS = {"fake-lister"}
+
+ def __init__(self, origin, credentials, lister_name, lister_instance_name):
+ pass
+
+ def get_origin_metadata(self):
+ return [REMD]
+
+ def get_parent_origins(self):
+ return [PARENT_ORIGIN]
+
+
+def test_types():
+ assert isinstance(
+ DummyMetadataFetcher(None, None, None, None), MetadataFetcherProtocol
+ )
+ assert isinstance(
+ DummyMetadataFetcherWithFork(None, None, None, None), MetadataFetcherProtocol
+ )
+
def test_base_loader(swh_storage):
loader = DummyBaseLoader(swh_storage)
@@ -138,6 +165,7 @@
assert swh_storage.raw_extrinsic_metadata_get(
ORIGIN.swhid(), METADATA_AUTHORITY
).results == [REMD]
+ assert loader.parent_origins == []
def test_base_loader_with_unknown_lister_name(swh_storage, mocker):
@@ -158,6 +186,32 @@
swh_storage.raw_extrinsic_metadata_get(ORIGIN.swhid(), METADATA_AUTHORITY)
+def test_base_loader_forked_origin(swh_storage, mocker):
+ fetcher_cls = MagicMock(wraps=DummyMetadataFetcherWithFork)
+ fetcher_cls.SUPPORTED_LISTERS = DummyMetadataFetcherWithFork.SUPPORTED_LISTERS
+ mocker.patch(
+ "swh.loader.core.metadata_fetchers._fetchers", return_value=[fetcher_cls]
+ )
+
+ loader = DummyBaseLoader(
+ swh_storage, lister_name="fake-lister", lister_instance_name=""
+ )
+ result = loader.load()
+ assert result == {"status": "eventful"}
+
+ fetcher_cls.assert_called_once()
+ fetcher_cls.assert_called_once_with(
+ origin=ORIGIN,
+ credentials={},
+ lister_name="fake-lister",
+ lister_instance_name="",
+ )
+ assert swh_storage.raw_extrinsic_metadata_get(
+ ORIGIN.swhid(), METADATA_AUTHORITY
+ ).results == [REMD]
+ assert loader.parent_origins == [PARENT_ORIGIN]
+
+
def test_dvcs_loader(swh_storage):
loader = DummyDVCSLoader(swh_storage)
result = loader.load()

File Metadata

Mime Type
text/plain
Expires
Sun, Aug 17, 8:07 PM (1 d, 2 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225310

Event Timeline