diff --git a/docs/persistent-identifiers.rst b/docs/persistent-identifiers.rst --- a/docs/persistent-identifiers.rst +++ b/docs/persistent-identifiers.rst @@ -42,7 +42,8 @@ ::= "swh" ":" ":" ":" ; ::= "1" ; ::= - "snp" (* snapshot *) + "ori" (* origin *) + | "snp" (* snapshot *) | "rel" (* release *) | "rev" (* revision *) | "dir" (* directory *) @@ -66,7 +67,8 @@ A persistent identifier points to a single object, whose type is explicitly captured by ````: -* ``snp`` identifiers points to **snapshots**, +* ``ori`` identifiers point to **origins** +* ``snp`` to **snapshots**, * ``rel`` to **releases**, * ``rev`` to **revisions**, * ``dir`` to **directories**, @@ -76,6 +78,9 @@ ````, which is a hex-encoded (using lowercase ASCII characters) SHA1 computed on the content and metadata of the object itself, as follows: +* for **origins**, intrinsic identifiers are computed as per + :py:func:`swh.model.identifiers.origin_identifier` + * for **snapshots**, intrinsic identifiers are computed as per :py:func:`swh.model.identifiers.snapshot_identifier` @@ -128,6 +133,8 @@ release 2.3.0, dated 24 December 2016 * ``swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453`` points to a snapshot of the entire Darktable Git repository taken on 4 May 2017 from GitHub +* ``swh:1:ori:b63a575fe3faab7692c9f38fb09d4bb45651bb0f`` points to the + repository https://github.com/torvalds/linux . Contextual information diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -5,6 +5,7 @@ import binascii import datetime +import hashlib from collections import namedtuple from functools import lru_cache @@ -14,6 +15,7 @@ from .hashutil import hash_git_data, hash_to_hex, MultiHash +ORIGIN = 'origin' SNAPSHOT = 'snapshot' REVISION = 'revision' RELEASE = 'release' @@ -597,7 +599,16 @@ return identifier_to_str(hash_git_data(b''.join(lines), 'snapshot')) +def origin_identifier(origin): + """Return the intrinsic identifier for an origin.""" + return hashlib.sha1(origin['url'].encode('ascii')).hexdigest() + + _object_type_map = { + ORIGIN: { + 'short_name': 'ori', + 'key_id': 'id' + }, SNAPSHOT: { 'short_name': 'snp', 'key_id': 'id' @@ -620,7 +631,7 @@ } } -PERSISTENT_IDENTIFIER_TYPES = ['snp', 'rel', 'rev', 'dir', 'cnt'] +PERSISTENT_IDENTIFIER_TYPES = ['ori', 'snp', 'rel', 'rev', 'dir', 'cnt'] PERSISTENT_IDENTIFIER_KEYS = [ 'namespace', 'scheme_version', 'object_type', 'object_id', 'metadata'] diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -893,3 +893,14 @@ with self.assertRaisesRegex( ValidationError, _error): identifiers.parse_persistent_identifier(pid) + + +class OriginIdentifier(unittest.TestCase): + def setUp(self): + self.origin = { + 'url': 'https://github.com/torvalds/linux', + } + + def test_content_identifier(self): + self.assertEqual(identifiers.origin_identifier(self.origin), + 'b63a575fe3faab7692c9f38fb09d4bb45651bb0f')