Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066146
D277.id933.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D277.id933.diff
View Options
diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py
--- a/swh/model/hashutil.py
+++ b/swh/model/hashutil.py
@@ -198,7 +198,7 @@
ValueError if the git_type is unexpected.
"""
- git_object_types = {'blob', 'tree', 'commit', 'tag'}
+ git_object_types = {'blob', 'tree', 'commit', 'tag', 'snapshot'}
if git_type not in git_object_types:
raise ValueError('Unexpected git object type %s, expected one of %s' %
diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -499,3 +499,88 @@
components.extend([b'\n', release['message']])
return identifier_to_str(hash_git_data(b''.join(components), 'tag'))
+
+
+def snapshot_identifier(snapshot, *, ignore_unresolved=False):
+ """Return the intrinsic identifier for a snapshot.
+
+ Snapshots are a set of named branches, which are pointers to objects at any
+ level of the Software Heritage DAG.
+
+ As well as pointing to other objects in the Software Heritage DAG, branches
+ can also be *symbolic*, in which case their target is the name of another
+ branch in the same snapshot, or *dangling*, in which case the target is
+ unknown (and represented by the ``None`` value).
+
+ A snapshot identifier is a salted sha1 (using the git hashing algorithm
+ with the ``snapshot`` object type) of a manifest following the algorithm:
+
+ 1. Branches are sorted using the name as key, in bytes order.
+
+ 2. For each branch, the following bytes are output:
+
+ - the type of the branch target:
+
+ - ``content``, ``directory``, ``revision``, ``release`` or ``snapshot``
+ for the corresponding entries in the DAG;
+ - ``symbolic`` for branches referencing another branch;
+ - ``dangling`` for dangling branches
+
+ - an ascii space (``\\x20``)
+ - the branch name (as raw bytes)
+ - a null byte (``\\x00``)
+ - the length of the target identifier, as an ascii-encoded decimal number
+ (``20`` for current intrinisic identifiers, ``0`` for dangling
+ branches, the length of the target branch name for symbolic branches)
+ - a colon (``:``)
+ - the identifier of the target object pointed at by the branch,
+ stored in the 'target' member:
+
+ - for contents: their *sha1_git*
+ - for directories, revisions, releases or snapshots: their intrinsic
+ identifier
+ - for symbolic branches, the name of the target branch (as raw bytes)
+ - for dangling branches, the empty string
+
+ Note that, akin to directory manifests, there is no separator between
+ entries. Because of symbolic branches, identifiers are of arbitrary
+ length but are length-encoded to avoid ambiguity.
+
+ Args:
+ snapshot (dict): the snapshot of which to compute the identifier. A
+ single entry is needed, ``'branches'``, which is itself a :class:`dict`
+ mapping each branch to its target
+ ignore_unresolved (bool): if `True`, ignore unresolved symbolic branches.
+
+ Returns:
+ str: the intrinsic identifier for `snapshot`
+
+ """
+
+ unresolved = []
+ lines = []
+
+ for name, target in sorted(snapshot['branches'].items()):
+ if not target:
+ target_type = b'dangling'
+ target_id = b''
+ elif target['target_type'] == 'symbolic':
+ target_type = b'symbolic'
+ target_id = target['target']
+ if target_id not in snapshot['branches'] or target_id == name:
+ unresolved.append((name, target_id))
+ else:
+ target_type = target['target_type'].encode()
+ target_id = identifier_to_bytes(target['target'])
+
+ lines.extend([
+ target_type, b'\x20', name, b'\x00',
+ ('%d:' % len(target_id)).encode(), target_id,
+ ])
+
+ if unresolved and not ignore_unresolved:
+ raise ValueError('Symbolic references unresolved: %s' %
+ ', '.join('%s -> %s' % (name, target)
+ for name, target in unresolved))
+
+ return identifier_to_str(hash_git_data(b''.join(lines), 'snapshot'))
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -679,3 +679,92 @@
identifiers.release_identifier(self.release_newline_in_author),
identifiers.identifier_to_str(self.release_newline_in_author['id'])
)
+
+
+class SnapshotIdentifier(unittest.TestCase):
+ def setUp(self):
+ super().setUp()
+
+ self.empty = {
+ 'id': '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e',
+ 'branches': {},
+ }
+
+ self.dangling_branch = {
+ 'id': 'c84502e821eb21ed84e9fd3ec40973abc8b32353',
+ 'branches': {
+ b'HEAD': None,
+ },
+ }
+
+ self.unresolved = {
+ 'id': 'ca56baf90b4fb52c0041764fdd98b77d89ef580d',
+ 'branches': {
+ b'foo': {
+ 'target': b'bar',
+ 'target_type': 'symbolic',
+ },
+ },
+ }
+
+ self.all_types = {
+ 'id': '175bab5e9f62248249e599af232d77f68e277965',
+ 'branches': {
+ b'directory': {
+ 'target': '1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8',
+ 'target_type': 'directory',
+ },
+ b'content': {
+ 'target': 'fe95a46679d128ff167b7c55df5d02356c5a1ae1',
+ 'target_type': 'content',
+ },
+ b'symbolic': {
+ 'target': b'revision',
+ 'target_type': 'symbolic',
+ },
+ b'revision': {
+ 'target': 'aafb16d69fd30ff58afdd69036a26047f3aebdc6',
+ 'target_type': 'revision',
+ },
+ b'release': {
+ 'target': '7045404f3d1c54e6473c71bbb716529fbad4be24',
+ 'target_type': 'release',
+ },
+ b'snapshot': {
+ 'target': '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e',
+ 'target_type': 'snapshot',
+ },
+ b'dangling': None,
+ }
+ }
+
+ def test_empty_snapshot(self):
+ self.assertEqual(
+ identifiers.snapshot_identifier(self.empty),
+ identifiers.identifier_to_str(self.empty['id']),
+ )
+
+ def test_dangling_branch(self):
+ self.assertEqual(
+ identifiers.snapshot_identifier(self.dangling_branch),
+ identifiers.identifier_to_str(self.dangling_branch['id']),
+ )
+
+ def test_unresolved(self):
+ with self.assertRaisesRegex(ValueError, "b'foo' -> b'bar'"):
+ identifiers.snapshot_identifier(self.unresolved)
+
+ def test_unresolved_force(self):
+ self.assertEqual(
+ identifiers.snapshot_identifier(
+ self.unresolved,
+ ignore_unresolved=True,
+ ),
+ identifiers.identifier_to_str(self.unresolved['id']),
+ )
+
+ def test_all_types(self):
+ self.assertEqual(
+ identifiers.snapshot_identifier(self.all_types),
+ identifiers.identifier_to_str(self.all_types['id']),
+ )
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 4 2024, 6:35 PM (19 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219179
Attached To
D277: Support snapshot identifiers in swh.model
Event Timeline
Log In to Comment