Page MenuHomeSoftware Heritage

D6268.id22694.diff
No OneTemporary

D6268.id22694.diff

diff --git a/swh/loader/mercurial/from_disk.py b/swh/loader/mercurial/from_disk.py
--- a/swh/loader/mercurial/from_disk.py
+++ b/swh/loader/mercurial/from_disk.py
@@ -187,11 +187,11 @@
# hg node id of the latest snapshot branch heads
# used to find what are the new revisions since last snapshot
- self._latest_heads: List[bytes] = []
+ self._latest_heads: List[HgNodeId] = []
# hg node ids of all the tags recorded on previous runs
# Used to compute which tags need to be added, even across incremental runs
# that might separate a changeset introducing a tag from its target.
- self._saved_tags: Set[bytes] = set()
+ self._saved_tags: Set[HgNodeId] = set()
self._load_status = "eventful"
# If set, will override the default value
@@ -267,15 +267,16 @@
extid.extid for extid in self._get_extids_for_targets(tags)
)
- def _get_extids_for_targets(self, targets: List[bytes]) -> List[ExtID]:
+ def _get_extids_for_targets(self, targets: List[HgNodeId]) -> List[ExtID]:
"""Get all Mercurial ExtIDs for the targets in the latest snapshot"""
- extids = [
- extid
- for extid in self.storage.extid_get_from_target(
- identifiers.ObjectType.REVISION, targets
- )
- if extid.extid_type == EXTID_TYPE and extid.extid_version == EXTID_VERSION
- ]
+ extids = []
+ for extid in self.storage.extid_get_from_target(
+ identifiers.ObjectType.REVISION, targets
+ ):
+ if extid.extid_type != EXTID_TYPE or extid.extid_version != EXTID_VERSION:
+ continue
+ extids.append(extid)
+ self._revision_nodeid_to_sha1git[extid.extid] = extid.target.object_id
if extids:
# Filter out dangling extids, we need to load their target again
@@ -289,16 +290,18 @@
]
return extids
- def _get_extids_for_hgnodes(self, hgnode_ids: List[bytes]) -> List[ExtID]:
+ def _get_extids_for_hgnodes(self, hgnode_ids: List[HgNodeId]) -> List[ExtID]:
"""Get all Mercurial ExtIDs for the mercurial nodes in the list which point to
a known revision.
"""
- extids = [
- extid
- for extid in self.storage.extid_get_from_extid(EXTID_TYPE, hgnode_ids)
- if extid.extid_version == EXTID_VERSION
- ]
+ extids = []
+ for extid in self.storage.extid_get_from_extid(EXTID_TYPE, hgnode_ids):
+ if extid.extid_version != EXTID_VERSION:
+ continue
+ extids.append(extid)
+ self._revision_nodeid_to_sha1git[extid.extid] = extid.target.object_id
+
if extids:
# Filter out dangling extids, we need to load their target again
revisions_missing = self.storage.revision_missing(
@@ -339,7 +342,7 @@
return False
- def _new_revs(self, heads: List[bytes]):
+ def _new_revs(self, heads: List[HgNodeId]):
"""Return unseen revisions. That is, filter out revisions that are not ancestors of
heads"""
assert self._repo is not None
@@ -406,13 +409,17 @@
ignored_revs.update(descendents)
if length_ingested_revs == 0:
- # The repository has nothing to ingest (either empty or broken repository)
self._load_status = "uneventful"
- return
+ # uneventful but still we need to make a snapshot if any, so we continue
+
+ self.log.debug(
+ "Revision ingested: %s (0 means no new revision)", length_ingested_revs
+ )
+ self.log.debug("Ignored revisions: %s", ignored_revs)
branching_info = hgutil.branching_info(repo, ignored_revs)
- tags_by_name: Dict[bytes, HgNodeId] = repo.tags()
+ tags_by_name: Dict[bytes, HgNodeId] = repo.tags()
snapshot_branches: Dict[bytes, SnapshotBranch] = {}
for tag_name, hg_nodeid in tags_by_name.items():
@@ -465,11 +472,11 @@
snapshot_branches[b"HEAD"] = SnapshotBranch(
target=default_branch_alias, target_type=TargetType.ALIAS,
)
- snapshot = Snapshot(branches=snapshot_branches)
- self.storage.snapshot_add([snapshot])
+ snapshot = Snapshot(branches=snapshot_branches)
+ self.storage.snapshot_add([snapshot])
+ self.loaded_snapshot_id = snapshot.id
self.flush()
- self.loaded_snapshot_id = snapshot.id
def load_status(self) -> Dict[str, str]:
"""Detailed loading status.
@@ -511,7 +518,7 @@
]
msg = "Expected 1 match from storage for hg node %r, got %d"
- assert len(from_storage) == 1, msg % (hg_nodeid.hex(), len(from_storage))
+ assert len(from_storage) == 1, msg % (hg_nodeid, len(from_storage))
return from_storage[0].target.object_id
def get_revision_parents(self, rev_ctx: hgutil.BaseContext) -> Tuple[Sha1Git, ...]:
diff --git a/swh/loader/mercurial/hgutil.py b/swh/loader/mercurial/hgutil.py
--- a/swh/loader/mercurial/hgutil.py
+++ b/swh/loader/mercurial/hgutil.py
@@ -10,7 +10,7 @@
import signal
import time
import traceback
-from typing import Dict, List, Mapping, NewType, Optional, Set
+from typing import Dict, List, Mapping, Optional, Set
from billiard import Process, Queue
@@ -19,7 +19,7 @@
import mercurial.ui # type: ignore
NULLID = mercurial.node.nullid
-HgNodeId = NewType("HgNodeId", bytes)
+HgNodeId = bytes
Repository = hg.localrepo
BaseContext = context.basectx
LRUCacheDict = util.lrucachedict
diff --git a/swh/loader/mercurial/tests/test_from_disk.py b/swh/loader/mercurial/tests/test_from_disk.py
--- a/swh/loader/mercurial/tests/test_from_disk.py
+++ b/swh/loader/mercurial/tests/test_from_disk.py
@@ -177,7 +177,7 @@
assert stats == expected_stats
loader2 = HgLoaderFromDisk(swh_storage, url=repo_url)
- assert loader2.load() == {"status": "uneventful"}
+ assert loader2.load() == {"status": "uneventful"} # nothing new happened
stats2 = get_stats(loader2.storage)
expected_stats2 = expected_stats.copy()
@@ -185,12 +185,13 @@
{"origin_visit": 1 + 1,}
)
assert stats2 == expected_stats2
- visit_status = assert_last_visit_matches(
- loader2.storage, repo_url, status="full", type="hg",
- )
- assert visit_status.snapshot is None
- # FIXME: Already seen objects are filtered out, so no new snapshot.
- # Current behavior but is it ok?
+ assert_last_visit_matches(
+ loader2.storage,
+ repo_url,
+ status="full",
+ type="hg",
+ snapshot=expected_snapshot.id,
+ ) # but we got a snapshot nonetheless
# This test has as been adapted from the historical `HgBundle20Loader` tests
@@ -308,7 +309,7 @@
def _partial_copy_storage(
- old_storage, origin_url: str, mechanism: str, copy_revisions: bool
+ old_storage, origin_url: str, mechanism: str, copy_revisions: bool = False
):
"""Create a new storage, and only copy ExtIDs or head revisions to it."""
new_storage = get_storage(cls="memory")
@@ -344,9 +345,8 @@
return new_storage
-@pytest.mark.parametrize("mechanism", ("extid", "same storage"))
def test_load_unchanged_repo_should_be_uneventful(
- swh_storage, datadir, tmp_path, mechanism
+ swh_storage, datadir, tmp_path,
):
"""Checks the loader can find which revisions it already loaded, using ExtIDs."""
archive_name = "hello"
@@ -367,45 +367,31 @@
"skipped_content": 0,
"snapshot": 1,
}
-
- old_storage = swh_storage
-
- # Create a new storage, and only copy ExtIDs or head revisions to it.
- # This should be enough for the loader to know revisions were already loaded
- new_storage = _partial_copy_storage(
- old_storage, repo_path, mechanism=mechanism, copy_revisions=True
+ visit_status = assert_last_visit_matches(
+ loader.storage, repo_path, type=RevisionType.MERCURIAL.value, status="full",
)
+ assert visit_status.snapshot is not None
# Create a new loader (to start with a clean slate, eg. remove the caches),
# with the new, partial, storage
- loader = HgLoaderFromDisk(new_storage, repo_path)
- assert loader.load() == {"status": "uneventful"}
+ loader2 = HgLoaderFromDisk(swh_storage, repo_path)
+ assert loader2.load() == {"status": "uneventful"}
- if mechanism == "same storage":
- # Should have all the objects
- assert get_stats(loader.storage) == {
- "content": 3,
- "directory": 3,
- "origin": 1,
- "origin_visit": 2,
- "release": 1,
- "revision": 3,
- "skipped_content": 0,
- "snapshot": 1,
- }
- else:
- # Should have only the objects we directly inserted from the test, plus
- # a new visit
- assert get_stats(loader.storage) == {
- "content": 0,
- "directory": 0,
- "origin": 1,
- "origin_visit": 2,
- "release": 0,
- "revision": 1,
- "skipped_content": 0,
- "snapshot": 1,
- }
+ # Should have all the objects
+ assert get_stats(loader.storage) == {
+ "content": 3,
+ "directory": 3,
+ "origin": 1,
+ "origin_visit": 2,
+ "release": 1,
+ "revision": 3,
+ "skipped_content": 0,
+ "snapshot": 1,
+ }
+ visit_status2 = assert_last_visit_matches(
+ loader2.storage, repo_path, type=RevisionType.MERCURIAL.value, status="full",
+ )
+ assert visit_status2.snapshot == visit_status.snapshot
def test_closed_branch_incremental(swh_storage, datadir, tmp_path):

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 1:38 PM (6 d, 29 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3228967

Event Timeline