Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9343543
D6268.id22694.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
9 KB
Subscribers
None
D6268.id22694.diff
View Options
diff --git a/swh/loader/mercurial/from_disk.py b/swh/loader/mercurial/from_disk.py
--- a/swh/loader/mercurial/from_disk.py
+++ b/swh/loader/mercurial/from_disk.py
@@ -187,11 +187,11 @@
# hg node id of the latest snapshot branch heads
# used to find what are the new revisions since last snapshot
- self._latest_heads: List[bytes] = []
+ self._latest_heads: List[HgNodeId] = []
# hg node ids of all the tags recorded on previous runs
# Used to compute which tags need to be added, even across incremental runs
# that might separate a changeset introducing a tag from its target.
- self._saved_tags: Set[bytes] = set()
+ self._saved_tags: Set[HgNodeId] = set()
self._load_status = "eventful"
# If set, will override the default value
@@ -267,15 +267,16 @@
extid.extid for extid in self._get_extids_for_targets(tags)
)
- def _get_extids_for_targets(self, targets: List[bytes]) -> List[ExtID]:
+ def _get_extids_for_targets(self, targets: List[HgNodeId]) -> List[ExtID]:
"""Get all Mercurial ExtIDs for the targets in the latest snapshot"""
- extids = [
- extid
- for extid in self.storage.extid_get_from_target(
- identifiers.ObjectType.REVISION, targets
- )
- if extid.extid_type == EXTID_TYPE and extid.extid_version == EXTID_VERSION
- ]
+ extids = []
+ for extid in self.storage.extid_get_from_target(
+ identifiers.ObjectType.REVISION, targets
+ ):
+ if extid.extid_type != EXTID_TYPE or extid.extid_version != EXTID_VERSION:
+ continue
+ extids.append(extid)
+ self._revision_nodeid_to_sha1git[extid.extid] = extid.target.object_id
if extids:
# Filter out dangling extids, we need to load their target again
@@ -289,16 +290,18 @@
]
return extids
- def _get_extids_for_hgnodes(self, hgnode_ids: List[bytes]) -> List[ExtID]:
+ def _get_extids_for_hgnodes(self, hgnode_ids: List[HgNodeId]) -> List[ExtID]:
"""Get all Mercurial ExtIDs for the mercurial nodes in the list which point to
a known revision.
"""
- extids = [
- extid
- for extid in self.storage.extid_get_from_extid(EXTID_TYPE, hgnode_ids)
- if extid.extid_version == EXTID_VERSION
- ]
+ extids = []
+ for extid in self.storage.extid_get_from_extid(EXTID_TYPE, hgnode_ids):
+ if extid.extid_version != EXTID_VERSION:
+ continue
+ extids.append(extid)
+ self._revision_nodeid_to_sha1git[extid.extid] = extid.target.object_id
+
if extids:
# Filter out dangling extids, we need to load their target again
revisions_missing = self.storage.revision_missing(
@@ -339,7 +342,7 @@
return False
- def _new_revs(self, heads: List[bytes]):
+ def _new_revs(self, heads: List[HgNodeId]):
"""Return unseen revisions. That is, filter out revisions that are not ancestors of
heads"""
assert self._repo is not None
@@ -406,13 +409,17 @@
ignored_revs.update(descendents)
if length_ingested_revs == 0:
- # The repository has nothing to ingest (either empty or broken repository)
self._load_status = "uneventful"
- return
+ # uneventful but still we need to make a snapshot if any, so we continue
+
+ self.log.debug(
+ "Revision ingested: %s (0 means no new revision)", length_ingested_revs
+ )
+ self.log.debug("Ignored revisions: %s", ignored_revs)
branching_info = hgutil.branching_info(repo, ignored_revs)
- tags_by_name: Dict[bytes, HgNodeId] = repo.tags()
+ tags_by_name: Dict[bytes, HgNodeId] = repo.tags()
snapshot_branches: Dict[bytes, SnapshotBranch] = {}
for tag_name, hg_nodeid in tags_by_name.items():
@@ -465,11 +472,11 @@
snapshot_branches[b"HEAD"] = SnapshotBranch(
target=default_branch_alias, target_type=TargetType.ALIAS,
)
- snapshot = Snapshot(branches=snapshot_branches)
- self.storage.snapshot_add([snapshot])
+ snapshot = Snapshot(branches=snapshot_branches)
+ self.storage.snapshot_add([snapshot])
+ self.loaded_snapshot_id = snapshot.id
self.flush()
- self.loaded_snapshot_id = snapshot.id
def load_status(self) -> Dict[str, str]:
"""Detailed loading status.
@@ -511,7 +518,7 @@
]
msg = "Expected 1 match from storage for hg node %r, got %d"
- assert len(from_storage) == 1, msg % (hg_nodeid.hex(), len(from_storage))
+ assert len(from_storage) == 1, msg % (hg_nodeid, len(from_storage))
return from_storage[0].target.object_id
def get_revision_parents(self, rev_ctx: hgutil.BaseContext) -> Tuple[Sha1Git, ...]:
diff --git a/swh/loader/mercurial/hgutil.py b/swh/loader/mercurial/hgutil.py
--- a/swh/loader/mercurial/hgutil.py
+++ b/swh/loader/mercurial/hgutil.py
@@ -10,7 +10,7 @@
import signal
import time
import traceback
-from typing import Dict, List, Mapping, NewType, Optional, Set
+from typing import Dict, List, Mapping, Optional, Set
from billiard import Process, Queue
@@ -19,7 +19,7 @@
import mercurial.ui # type: ignore
NULLID = mercurial.node.nullid
-HgNodeId = NewType("HgNodeId", bytes)
+HgNodeId = bytes
Repository = hg.localrepo
BaseContext = context.basectx
LRUCacheDict = util.lrucachedict
diff --git a/swh/loader/mercurial/tests/test_from_disk.py b/swh/loader/mercurial/tests/test_from_disk.py
--- a/swh/loader/mercurial/tests/test_from_disk.py
+++ b/swh/loader/mercurial/tests/test_from_disk.py
@@ -177,7 +177,7 @@
assert stats == expected_stats
loader2 = HgLoaderFromDisk(swh_storage, url=repo_url)
- assert loader2.load() == {"status": "uneventful"}
+ assert loader2.load() == {"status": "uneventful"} # nothing new happened
stats2 = get_stats(loader2.storage)
expected_stats2 = expected_stats.copy()
@@ -185,12 +185,13 @@
{"origin_visit": 1 + 1,}
)
assert stats2 == expected_stats2
- visit_status = assert_last_visit_matches(
- loader2.storage, repo_url, status="full", type="hg",
- )
- assert visit_status.snapshot is None
- # FIXME: Already seen objects are filtered out, so no new snapshot.
- # Current behavior but is it ok?
+ assert_last_visit_matches(
+ loader2.storage,
+ repo_url,
+ status="full",
+ type="hg",
+ snapshot=expected_snapshot.id,
+ ) # but we got a snapshot nonetheless
# This test has as been adapted from the historical `HgBundle20Loader` tests
@@ -308,7 +309,7 @@
def _partial_copy_storage(
- old_storage, origin_url: str, mechanism: str, copy_revisions: bool
+ old_storage, origin_url: str, mechanism: str, copy_revisions: bool = False
):
"""Create a new storage, and only copy ExtIDs or head revisions to it."""
new_storage = get_storage(cls="memory")
@@ -344,9 +345,8 @@
return new_storage
-@pytest.mark.parametrize("mechanism", ("extid", "same storage"))
def test_load_unchanged_repo_should_be_uneventful(
- swh_storage, datadir, tmp_path, mechanism
+ swh_storage, datadir, tmp_path,
):
"""Checks the loader can find which revisions it already loaded, using ExtIDs."""
archive_name = "hello"
@@ -367,45 +367,31 @@
"skipped_content": 0,
"snapshot": 1,
}
-
- old_storage = swh_storage
-
- # Create a new storage, and only copy ExtIDs or head revisions to it.
- # This should be enough for the loader to know revisions were already loaded
- new_storage = _partial_copy_storage(
- old_storage, repo_path, mechanism=mechanism, copy_revisions=True
+ visit_status = assert_last_visit_matches(
+ loader.storage, repo_path, type=RevisionType.MERCURIAL.value, status="full",
)
+ assert visit_status.snapshot is not None
# Create a new loader (to start with a clean slate, eg. remove the caches),
# with the new, partial, storage
- loader = HgLoaderFromDisk(new_storage, repo_path)
- assert loader.load() == {"status": "uneventful"}
+ loader2 = HgLoaderFromDisk(swh_storage, repo_path)
+ assert loader2.load() == {"status": "uneventful"}
- if mechanism == "same storage":
- # Should have all the objects
- assert get_stats(loader.storage) == {
- "content": 3,
- "directory": 3,
- "origin": 1,
- "origin_visit": 2,
- "release": 1,
- "revision": 3,
- "skipped_content": 0,
- "snapshot": 1,
- }
- else:
- # Should have only the objects we directly inserted from the test, plus
- # a new visit
- assert get_stats(loader.storage) == {
- "content": 0,
- "directory": 0,
- "origin": 1,
- "origin_visit": 2,
- "release": 0,
- "revision": 1,
- "skipped_content": 0,
- "snapshot": 1,
- }
+ # Should have all the objects
+ assert get_stats(loader.storage) == {
+ "content": 3,
+ "directory": 3,
+ "origin": 1,
+ "origin_visit": 2,
+ "release": 1,
+ "revision": 3,
+ "skipped_content": 0,
+ "snapshot": 1,
+ }
+ visit_status2 = assert_last_visit_matches(
+ loader2.storage, repo_path, type=RevisionType.MERCURIAL.value, status="full",
+ )
+ assert visit_status2.snapshot == visit_status.snapshot
def test_closed_branch_incremental(swh_storage, datadir, tmp_path):
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 1:38 PM (6 d, 29 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3228967
Attached To
D6268: mercurial: Build snapshot on visits
Event Timeline
Log In to Comment