Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124826
D6330.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
53 KB
Subscribers
None
D6330.diff
View Options
diff --git a/swh/model/cli.py b/swh/model/cli.py
--- a/swh/model/cli.py
+++ b/swh/model/cli.py
@@ -26,7 +26,7 @@
swh_cli_group = click # type: ignore
from swh.model.from_disk import Directory
-from swh.model.identifiers import CoreSWHID, ObjectType
+from swh.model.swhids import CoreSWHID
CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
@@ -42,7 +42,7 @@
class CoreSWHIDParamType(click.ParamType):
"""Click argument that accepts a core SWHID and returns them as
- :class:`swh.model.identifiers.CoreSWHID` instances """
+ :class:`swh.model.swhids.CoreSWHID` instances """
name = "SWHID"
@@ -87,17 +87,9 @@
def swhid_of_origin(url):
- from swh.model.hashutil import hash_to_bytes
- from swh.model.identifiers import (
- ExtendedObjectType,
- ExtendedSWHID,
- origin_identifier,
- )
+ from swh.model.model import Origin
- return ExtendedSWHID(
- object_type=ExtendedObjectType.ORIGIN,
- object_id=hash_to_bytes(origin_identifier({"url": url})),
- )
+ return Origin(url).swhid()
def swhid_of_git_repo(path) -> CoreSWHID:
@@ -110,7 +102,7 @@
)
from swh.model import hashutil
- from swh.model.identifiers import snapshot_identifier
+ from swh.model.model import Snapshot
repo = dulwich.repo.Repo(path)
@@ -133,10 +125,7 @@
snapshot = {"branches": branches}
- return CoreSWHID(
- object_type=ObjectType.SNAPSHOT,
- object_id=hashutil.hash_to_bytes(snapshot_identifier(snapshot)),
- )
+ return Snapshot.from_dict(snapshot).swhid()
def identify_object(
diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py
--- a/swh/model/from_disk.py
+++ b/swh/model/from_disk.py
@@ -18,14 +18,10 @@
from . import model
from .exceptions import InvalidDirectoryPath
-from .hashutil import MultiHash, hash_to_bytes, hash_to_hex
-from .identifiers import (
- CoreSWHID,
- ObjectType,
- directory_entry_sort_key,
- directory_identifier,
-)
+from .git_objects import directory_entry_sort_key
+from .hashutil import MultiHash, hash_to_hex
from .merkle import MerkleLeaf, MerkleNode
+from .swhids import CoreSWHID, ObjectType
@attr.s(frozen=True, slots=True)
@@ -477,8 +473,8 @@
@property
def entries(self):
- """Child nodes, sorted by name in the same way `directory_identifier`
- does."""
+ """Child nodes, sorted by name in the same way
+ :func:`swh.model.git_objects.directory_git_object` does."""
if self.__entries is None:
self.__entries = sorted(
(
@@ -496,7 +492,7 @@
return CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=self.hash)
def compute_hash(self):
- return hash_to_bytes(directory_identifier({"entries": self.entries}))
+ return model.Directory.from_dict({"entries": self.entries}).id
def to_model(self) -> model.Directory:
"""Builds a `model.Directory` object based on this node;
diff --git a/swh/model/git_objects.py b/swh/model/git_objects.py
--- a/swh/model/git_objects.py
+++ b/swh/model/git_objects.py
@@ -27,7 +27,7 @@
@lru_cache()
def _perms_to_bytes(perms):
- """Convert the perms value to its bytes representation"""
+ """Convert the perms value to its canonical bytes representation"""
oc = oct(perms)[2:]
return oc.encode("ascii")
@@ -117,7 +117,6 @@
UTC
- negative_utc: a boolean representing whether the offset is -0000
when offset = 0.
-
"""
if time_representation is None:
return None
@@ -126,6 +125,41 @@
def directory_git_object(directory: model.Directory) -> bytes:
+ """Formats a directory as a git tree.
+
+ A directory's identifier is the tree sha1 à la git of a directory listing,
+ using the following algorithm, which is equivalent to the git algorithm for
+ trees:
+
+ 1. Entries of the directory are sorted using the name (or the name with '/'
+ appended for directory entries) as key, in bytes order.
+
+ 2. For each entry of the directory, the following bytes are output:
+
+ - the octal representation of the permissions for the entry (stored in
+ the 'perms' member), which is a representation of the entry type:
+
+ - b'100644' (int 33188) for files
+ - b'100755' (int 33261) for executable files
+ - b'120000' (int 40960) for symbolic links
+ - b'40000' (int 16384) for directories
+ - b'160000' (int 57344) for references to revisions
+
+ - an ascii space (b'\x20')
+ - the entry's name (as raw bytes), stored in the 'name' member
+ - a null byte (b'\x00')
+ - the 20 byte long identifier of the object pointed at by the entry,
+ stored in the 'target' member:
+
+ - for files or executable files: their blob sha1_git
+ - for symbolic links: the blob sha1_git of a file containing the link
+ destination
+ - for directories: their intrinsic identifier
+ - for revisions: their intrinsic identifier
+
+ (Note that there is no separator between entries)
+
+ """
if isinstance(directory, dict):
# For backward compatibility
directory = model.Directory.from_dict(directory)
@@ -219,7 +253,6 @@
Returns:
the byte string containing the authorship data
-
"""
ret = [author.fullname]
@@ -234,8 +267,55 @@
def revision_git_object(revision: model.Revision) -> bytes:
- """Formats the git_object of a revision. See :func:`revision_identifier` for details
- on the format."""
+ """Formats a revision as a git tree.
+
+ The fields used for the revision identifier computation are:
+
+ - directory
+ - parents
+ - author
+ - author_date
+ - committer
+ - committer_date
+ - extra_headers or metadata -> extra_headers
+ - message
+
+ A revision's identifier is the 'git'-checksum of a commit manifest
+ constructed as follows (newlines are a single ASCII newline character)::
+
+ tree <directory identifier>
+ [for each parent in parents]
+ parent <parent identifier>
+ [end for each parents]
+ author <author> <author_date>
+ committer <committer> <committer_date>
+ [for each key, value in extra_headers]
+ <key> <encoded value>
+ [end for each extra_headers]
+
+ <message>
+
+ The directory identifier is the ascii representation of its hexadecimal
+ encoding.
+
+ Author and committer are formatted using the :attr:`Person.fullname` attribute only.
+ Dates are formatted with the :func:`format_offset` function.
+
+ Extra headers are an ordered list of [key, value] pairs. Keys are strings
+ and get encoded to utf-8 for identifier computation. Values are either byte
+ strings, unicode strings (that get encoded to utf-8), or integers (that get
+ encoded to their utf-8 decimal representation).
+
+ Multiline extra header values are escaped by indenting the continuation
+ lines with one ascii space.
+
+ If the message is None, the manifest ends with the last header. Else, the
+ message is appended to the headers after an empty line.
+
+ The checksum of the full manifest is computed using the 'commit' git object
+ type.
+
+ """
if isinstance(revision, dict):
# For backward compatibility
revision = model.Revision.from_dict(revision)
@@ -290,8 +370,50 @@
def snapshot_git_object(snapshot: model.Snapshot) -> bytes:
- """Formats the git_object of a revision. See :func:`snapshot_identifier` for details
- on the format."""
+ """Formats a snapshot as a git-like object.
+
+ Snapshots are a set of named branches, which are pointers to objects at any
+ level of the Software Heritage DAG.
+
+ As well as pointing to other objects in the Software Heritage DAG, branches
+ can also be *alias*es, in which case their target is the name of another
+ branch in the same snapshot, or *dangling*, in which case the target is
+ unknown (and represented by the ``None`` value).
+
+ A snapshot identifier is a salted sha1 (using the git hashing algorithm
+ with the ``snapshot`` object type) of a manifest following the algorithm:
+
+ 1. Branches are sorted using the name as key, in bytes order.
+
+ 2. For each branch, the following bytes are output:
+
+ - the type of the branch target:
+
+ - ``content``, ``directory``, ``revision``, ``release`` or ``snapshot``
+ for the corresponding entries in the DAG;
+ - ``alias`` for branches referencing another branch;
+ - ``dangling`` for dangling branches
+
+ - an ascii space (``\\x20``)
+ - the branch name (as raw bytes)
+ - a null byte (``\\x00``)
+ - the length of the target identifier, as an ascii-encoded decimal number
+ (``20`` for current intrinsic identifiers, ``0`` for dangling
+ branches, the length of the target branch name for branch aliases)
+ - a colon (``:``)
+ - the identifier of the target object pointed at by the branch,
+ stored in the 'target' member:
+
+ - for contents: their *sha1_git*
+ - for directories, revisions, releases or snapshots: their intrinsic
+ identifier
+ - for branch aliases, the name of the target branch (as raw bytes)
+ - for dangling branches, the empty string
+
+ Note that, akin to directory manifests, there is no separator between
+ entries. Because of symbolic branches, identifiers are of arbitrary
+ length but are length-encoded to avoid ambiguity.
+ """
if isinstance(snapshot, dict):
# For backward compatibility
snapshot = model.Snapshot.from_dict(snapshot)
@@ -334,9 +456,47 @@
def raw_extrinsic_metadata_git_object(metadata: model.RawExtrinsicMetadata) -> bytes:
- """Formats the git_object of a raw_extrinsic_metadata object.
- See :func:`raw_extrinsic_metadata_identifier` for details
- on the format."""
+ """Formats RawExtrinsicMetadata as a git-like object.
+
+ A raw_extrinsic_metadata identifier is a salted sha1 (using the git
+ hashing algorithm with the ``raw_extrinsic_metadata`` object type) of
+ a manifest following the format::
+
+ target $ExtendedSwhid
+ discovery_date $Timestamp
+ authority $StrWithoutSpaces $IRI
+ fetcher $Str $Version
+ format $StrWithoutSpaces
+ origin $IRI <- optional
+ visit $IntInDecimal <- optional
+ snapshot $CoreSwhid <- optional
+ release $CoreSwhid <- optional
+ revision $CoreSwhid <- optional
+ path $Bytes <- optional
+ directory $CoreSwhid <- optional
+
+ $MetadataBytes
+
+ $IRI must be RFC 3987 IRIs (so they may contain newlines, that are escaped as
+ described below)
+
+ $StrWithoutSpaces and $Version are ASCII strings, and may not contain spaces.
+
+ $Str is an UTF-8 string.
+
+ $CoreSwhid are core SWHIDs, as defined in :ref:`persistent-identifiers`.
+ $ExtendedSwhid is a core SWHID, with extra types allowed ('ori' for
+ origins and 'emd' for raw extrinsic metadata)
+
+ $Timestamp is a decimal representation of the rounded-down integer number of
+ seconds since the UNIX epoch (1970-01-01 00:00:00 UTC),
+ with no leading '0' (unless the timestamp value is zero) and no timezone.
+ It may be negative by prefixing it with a '-', which must not be followed
+ by a '0'.
+
+ Newlines in $Bytes, $Str, and $Iri are escaped as with other git fields,
+ ie. by adding a space after them.
+ """
if isinstance(metadata, dict):
# For backward compatibility
metadata = model.RawExtrinsicMetadata.from_dict(metadata)
@@ -389,6 +549,26 @@
def extid_git_object(extid: model.ExtID) -> bytes:
+ """Formats an extid as a gi-like object.
+
+ An ExtID identifier is a salted sha1 (using the git hashing algorithm with
+ the ``extid`` object type) of a manifest following the format:
+
+ ```
+ extid_type $StrWithoutSpaces
+ [extid_version $Str]
+ extid $Bytes
+ target $CoreSwhid
+ ```
+
+ $StrWithoutSpaces is an ASCII string, and may not contain spaces.
+
+ Newlines in $Bytes are escaped as with other git fields, ie. by adding a
+ space after them.
+
+ The extid_version line is only generated if the version is non-zero.
+ """
+
headers = [
(b"extid_type", extid.extid_type.encode("ascii")),
]
diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py
--- a/swh/model/hypothesis_strategies.py
+++ b/swh/model/hypothesis_strategies.py
@@ -29,8 +29,6 @@
)
from .from_disk import DentryPerms
-from .hashutil import hash_to_bytes
-from .identifiers import ExtendedObjectType, ExtendedSWHID, snapshot_identifier
from .model import (
BaseContent,
Content,
@@ -54,6 +52,7 @@
Timestamp,
TimestampWithTimezone,
)
+from .swhids import ExtendedObjectType, ExtendedSWHID
pgsql_alphabet = characters(
blacklist_categories=("Cs",), blacklist_characters=["\u0000"]
@@ -396,7 +395,7 @@
# Ensure no cycles between aliases
while True:
try:
- id_ = snapshot_identifier(
+ snapshot = Snapshot.from_dict(
{
"branches": {
name: branch or None for (name, branch) in branches.items()
@@ -409,7 +408,7 @@
else:
break
- return dict(id=hash_to_bytes(id_), branches=branches)
+ return snapshot.to_dict()
def snapshots(*, min_size=0, max_size=100, only_objects=False):
diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -3,9 +3,8 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from __future__ import annotations
-
from typing import Any, Dict
+import warnings
from . import model
@@ -16,6 +15,14 @@
# Reexport for backward compatibility
from .swhids import * # noqa
+warnings.warn(
+ "The swh.model.identifiers module is deprecated. "
+ "SWHID-related classes were moved to swh.model.swhids, and identifier "
+ "computation is now done directly with swh.model.model classes.",
+ DeprecationWarning,
+ stacklevel=2,
+)
+
# The following are deprecated aliases of the variants defined in ObjectType
# while transitioning from SWHID to QualifiedSWHID
ORIGIN = "origin"
@@ -28,260 +35,57 @@
def content_identifier(content: Dict[str, Any]) -> Dict[str, bytes]:
- """Return the intrinsic identifier for a content.
-
- A content's identifier is the sha1, sha1_git and sha256 checksums of its
- data.
-
- Args:
- content: a content conforming to the Software Heritage schema
-
- Returns:
- A dictionary with all the hashes for the data
-
- Raises:
- KeyError: if the content doesn't have a data member.
-
+ """Deprecated, use :class:`swh.model.Content` instead:
+ ``content_identifier(d)`` is equivalent to:
+ ``{k: hash_to_hex(v) for (k, v) in Content.from_data(d["data"]).hashes().items()}``
"""
-
return MultiHash.from_data(content["data"]).digest()
def directory_identifier(directory: Dict[str, Any]) -> str:
- """Return the intrinsic identifier for a directory.
-
- A directory's identifier is the tree sha1 à la git of a directory listing,
- using the following algorithm, which is equivalent to the git algorithm for
- trees:
+ """Deprecated, use :class:`swh.model.Directory` instead:
+ ``directory_identifier(d)`` is equivalent to:
+ ``hash_to_hex(Directory.from_dict(d).id)``.
- 1. Entries of the directory are sorted using the name (or the name with '/'
- appended for directory entries) as key, in bytes order.
-
- 2. For each entry of the directory, the following bytes are output:
-
- - the octal representation of the permissions for the entry (stored in
- the 'perms' member), which is a representation of the entry type:
-
- - b'100644' (int 33188) for files
- - b'100755' (int 33261) for executable files
- - b'120000' (int 40960) for symbolic links
- - b'40000' (int 16384) for directories
- - b'160000' (int 57344) for references to revisions
-
- - an ascii space (b'\x20')
- - the entry's name (as raw bytes), stored in the 'name' member
- - a null byte (b'\x00')
- - the 20 byte long identifier of the object pointed at by the entry,
- stored in the 'target' member:
-
- - for files or executable files: their blob sha1_git
- - for symbolic links: the blob sha1_git of a file containing the link
- destination
- - for directories: their intrinsic identifier
- - for revisions: their intrinsic identifier
-
- (Note that there is no separator between entries)
-
- """
+ See :func:`swh.model.git_objects.directory_git_object` for details of the
+ format used to generate this identifier."""
return hash_to_hex(model.Directory.from_dict(directory).id)
def revision_identifier(revision: Dict[str, Any]) -> str:
- """Return the intrinsic identifier for a revision.
-
- The fields used for the revision identifier computation are:
+ """Deprecated, use :class:`swh.model.Revision` instead:
+ ``revision_identifier(d)`` is equivalent to:
+ ``hash_to_hex(Revision.from_dict(d).id)``.
- - directory
- - parents
- - author
- - author_date
- - committer
- - committer_date
- - extra_headers or metadata -> extra_headers
- - message
-
- A revision's identifier is the 'git'-checksum of a commit manifest
- constructed as follows (newlines are a single ASCII newline character)::
-
- tree <directory identifier>
- [for each parent in parents]
- parent <parent identifier>
- [end for each parents]
- author <author> <author_date>
- committer <committer> <committer_date>
- [for each key, value in extra_headers]
- <key> <encoded value>
- [end for each extra_headers]
-
- <message>
-
- The directory identifier is the ascii representation of its hexadecimal
- encoding.
-
- Author and committer are formatted using the :attr:`Person.fullname` attribute only.
- Dates are formatted with the :func:`format_offset` function.
-
- Extra headers are an ordered list of [key, value] pairs. Keys are strings
- and get encoded to utf-8 for identifier computation. Values are either byte
- strings, unicode strings (that get encoded to utf-8), or integers (that get
- encoded to their utf-8 decimal representation).
-
- Multiline extra header values are escaped by indenting the continuation
- lines with one ascii space.
-
- If the message is None, the manifest ends with the last header. Else, the
- message is appended to the headers after an empty line.
-
- The checksum of the full manifest is computed using the 'commit' git object
- type.
-
- """
+ See :func:`swh.model.git_objects.revision_git_object` for details of the
+ format used to generate this identifier."""
return hash_to_hex(model.Revision.from_dict(revision).id)
def release_identifier(release: Dict[str, Any]) -> str:
- """Return the intrinsic identifier for a release."""
+ """Deprecated, use :class:`swh.model.Release` instead:
+ ``release_identifier(d)`` is equivalent to:
+ ``hash_to_hex(Release.from_dict(d).id)``.
+
+ See :func:`swh.model.git_objects.release_git_object` for details of the
+ format used to generate this identifier."""
return hash_to_hex(model.Release.from_dict(release).id)
def snapshot_identifier(snapshot: Dict[str, Any]) -> str:
- """Return the intrinsic identifier for a snapshot.
-
- Snapshots are a set of named branches, which are pointers to objects at any
- level of the Software Heritage DAG.
-
- As well as pointing to other objects in the Software Heritage DAG, branches
- can also be *alias*es, in which case their target is the name of another
- branch in the same snapshot, or *dangling*, in which case the target is
- unknown (and represented by the ``None`` value).
-
- A snapshot identifier is a salted sha1 (using the git hashing algorithm
- with the ``snapshot`` object type) of a manifest following the algorithm:
-
- 1. Branches are sorted using the name as key, in bytes order.
-
- 2. For each branch, the following bytes are output:
-
- - the type of the branch target:
+ """Deprecated, use :class:`swh.model.Snapshot` instead:
+ ``snapshot_identifier(d)`` is equivalent to:
+ ``hash_to_hex(Snapshot.from_dict(d).id)``.
- - ``content``, ``directory``, ``revision``, ``release`` or ``snapshot``
- for the corresponding entries in the DAG;
- - ``alias`` for branches referencing another branch;
- - ``dangling`` for dangling branches
-
- - an ascii space (``\\x20``)
- - the branch name (as raw bytes)
- - a null byte (``\\x00``)
- - the length of the target identifier, as an ascii-encoded decimal number
- (``20`` for current intrinsic identifiers, ``0`` for dangling
- branches, the length of the target branch name for branch aliases)
- - a colon (``:``)
- - the identifier of the target object pointed at by the branch,
- stored in the 'target' member:
-
- - for contents: their *sha1_git*
- - for directories, revisions, releases or snapshots: their intrinsic
- identifier
- - for branch aliases, the name of the target branch (as raw bytes)
- - for dangling branches, the empty string
-
- Note that, akin to directory manifests, there is no separator between
- entries. Because of symbolic branches, identifiers are of arbitrary
- length but are length-encoded to avoid ambiguity.
-
- Args:
- snapshot (dict): the snapshot of which to compute the identifier. A
- single entry is needed, ``'branches'``, which is itself a :class:`dict`
- mapping each branch to its target
-
- Returns:
- str: the intrinsic identifier for `snapshot`
-
- """
+ See :func:`swh.model.git_objects.snapshot_git_object` for details of the
+ format used to generate this identifier."""
return hash_to_hex(model.Snapshot.from_dict(snapshot).id)
def origin_identifier(origin):
- """Return the intrinsic identifier for an origin.
-
- An origin's identifier is the sha1 checksum of the entire origin URL
-
+ """Deprecated, use :class:`swh.model.Origin` instead:
+ ``origin_identifier(url)`` is equivalent to:
+ ``hash_to_hex(Origin(url=url).id)``.
"""
- return hash_to_hex(model.Origin.from_dict(origin).id)
-
-
-def raw_extrinsic_metadata_identifier(metadata: Dict[str, Any]) -> str:
- """Return the intrinsic identifier for a RawExtrinsicMetadata object.
-
- A raw_extrinsic_metadata identifier is a salted sha1 (using the git
- hashing algorithm with the ``raw_extrinsic_metadata`` object type) of
- a manifest following the format::
-
- target $ExtendedSwhid
- discovery_date $Timestamp
- authority $StrWithoutSpaces $IRI
- fetcher $Str $Version
- format $StrWithoutSpaces
- origin $IRI <- optional
- visit $IntInDecimal <- optional
- snapshot $CoreSwhid <- optional
- release $CoreSwhid <- optional
- revision $CoreSwhid <- optional
- path $Bytes <- optional
- directory $CoreSwhid <- optional
-
- $MetadataBytes
-
- $IRI must be RFC 3987 IRIs (so they may contain newlines, that are escaped as
- described below)
-
- $StrWithoutSpaces and $Version are ASCII strings, and may not contain spaces.
-
- $Str is an UTF-8 string.
- $CoreSwhid are core SWHIDs, as defined in :ref:`persistent-identifiers`.
- $ExtendedSwhid is a core SWHID, with extra types allowed ('ori' for
- origins and 'emd' for raw extrinsic metadata)
-
- $Timestamp is a decimal representation of the rounded-down integer number of
- seconds since the UNIX epoch (1970-01-01 00:00:00 UTC),
- with no leading '0' (unless the timestamp value is zero) and no timezone.
- It may be negative by prefixing it with a '-', which must not be followed
- by a '0'.
-
- Newlines in $Bytes, $Str, and $Iri are escaped as with other git fields,
- ie. by adding a space after them.
-
- Returns:
- str: the intrinsic identifier for ``metadata``
-
- """
- return hash_to_hex(model.RawExtrinsicMetadata.from_dict(metadata).id)
-
-
-def extid_identifier(extid: Dict[str, Any]) -> str:
- """Return the intrinsic identifier for an ExtID object.
-
- An ExtID identifier is a salted sha1 (using the git hashing algorithm with
- the ``extid`` object type) of a manifest following the format:
-
- ```
- extid_type $StrWithoutSpaces
- [extid_version $Str]
- extid $Bytes
- target $CoreSwhid
- ```
-
- $StrWithoutSpaces is an ASCII string, and may not contain spaces.
-
- Newlines in $Bytes are escaped as with other git fields, ie. by adding a
- space after them.
-
- The extid_version line is only generated if the version is non-zero.
-
- Returns:
- str: the intrinsic identifier for `extid`
-
- """
-
- return hash_to_hex(model.ExtID.from_dict(extid).id)
+ return hash_to_hex(model.Origin.from_dict(origin).id)
diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py
--- a/swh/model/tests/swh_model_data.py
+++ b/swh/model/tests/swh_model_data.py
@@ -9,7 +9,6 @@
import attr
from swh.model.hashutil import MultiHash, hash_to_bytes
-from swh.model.identifiers import ExtendedSWHID
from swh.model.model import (
BaseModel,
Content,
@@ -35,6 +34,7 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhids import ExtendedSWHID
UTC = datetime.timezone.utc
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -12,18 +12,27 @@
import attr
import pytest
-from swh.model import hashutil, identifiers
+from swh.model import git_objects, hashutil
from swh.model.exceptions import ValidationError
from swh.model.hashutil import hash_to_bytes as _x
-from swh.model.hashutil import hash_to_hex
-from swh.model.identifiers import (
+from swh.model.model import (
+ Content,
+ Directory,
+ ExtID,
+ Origin,
+ RawExtrinsicMetadata,
+ Release,
+ Revision,
+ Snapshot,
+ TimestampWithTimezone,
+)
+from swh.model.swhids import (
SWHID_QUALIFIERS,
CoreSWHID,
ExtendedObjectType,
ExtendedSWHID,
ObjectType,
QualifiedSWHID,
- normalize_timestamp,
)
@@ -51,11 +60,11 @@
def test_format_date(self):
for date_repr, date in self.dates.items():
- self.assertEqual(identifiers.format_date(date), date_repr)
+ self.assertEqual(git_objects.format_date(date), date_repr)
def test_format_offset(self):
for offset, res in self.offsets.items():
- self.assertEqual(identifiers.format_offset(offset), res)
+ self.assertEqual(git_objects.format_offset(offset), res)
content_example = {
@@ -72,7 +81,7 @@
def test_content_identifier(self):
self.assertEqual(
- identifiers.content_identifier(content_example), self.content_id
+ Content.from_data(content_example["data"]).hashes(), self.content_id
)
@@ -199,26 +208,22 @@
}
def test_dir_identifier(self):
+ self.assertEqual(Directory.from_dict(self.directory).id, self.directory["id"])
self.assertEqual(
- _x(identifiers.directory_identifier(self.directory)), self.directory["id"]
- )
- self.assertEqual(
- _x(identifiers.directory_identifier(remove_id(self.directory))),
- self.directory["id"],
+ Directory.from_dict(remove_id(self.directory)).id, self.directory["id"],
)
def test_dir_identifier_entry_order(self):
# Reverse order of entries, check the id is still the same.
directory = {"entries": reversed(self.directory["entries"])}
self.assertEqual(
- _x(identifiers.directory_identifier(remove_id(directory))),
- self.directory["id"],
+ Directory.from_dict(remove_id(directory)).id, self.directory["id"],
)
def test_dir_identifier_empty_directory(self):
self.assertEqual(
- identifiers.directory_identifier(remove_id(self.empty_directory)),
- self.empty_directory["id"],
+ Directory.from_dict(remove_id(self.empty_directory)).id,
+ _x(self.empty_directory["id"]),
)
@@ -270,7 +275,7 @@
self.revision = revision_example
self.revision_none_metadata = {
- "id": "bc0195aad0daa2ad5b0d76cce22b167bc3435590",
+ "id": _x("bc0195aad0daa2ad5b0d76cce22b167bc3435590"),
"directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"),
"parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")],
"author": {
@@ -328,7 +333,7 @@
# cat commit.txt | git hash-object -t commit --stdin
self.revision_with_extra_headers = {
- "id": "010d34f384fa99d047cdd5e2f41e56e5c2feee45",
+ "id": _x("010d34f384fa99d047cdd5e2f41e56e5c2feee45"),
"directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"),
"parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")],
"author": {
@@ -355,7 +360,7 @@
}
self.revision_with_gpgsig = {
- "id": "44cc742a8ca17b9c279be4cc195a93a6ef7a320e",
+ "id": _x("44cc742a8ca17b9c279be4cc195a93a6ef7a320e"),
"directory": _x("b134f9b7dc434f593c0bab696345548b37de0558"),
"parents": [
_x("689664ae944b4692724f13b709a4e4de28b54e57"),
@@ -380,7 +385,7 @@
}
self.revision_no_message = {
- "id": "4cfc623c9238fa92c832beed000ce2d003fd8333",
+ "id": _x("4cfc623c9238fa92c832beed000ce2d003fd8333"),
"directory": _x("b134f9b7dc434f593c0bab696345548b37de0558"),
"parents": [
_x("689664ae944b4692724f13b709a4e4de28b54e57"),
@@ -400,7 +405,7 @@
}
self.revision_empty_message = {
- "id": "7442cd78bd3b4966921d6a7f7447417b7acb15eb",
+ "id": _x("7442cd78bd3b4966921d6a7f7447417b7acb15eb"),
"directory": _x("b134f9b7dc434f593c0bab696345548b37de0558"),
"parents": [
_x("689664ae944b4692724f13b709a4e4de28b54e57"),
@@ -420,7 +425,7 @@
}
self.revision_only_fullname = {
- "id": "010d34f384fa99d047cdd5e2f41e56e5c2feee45",
+ "id": _x("010d34f384fa99d047cdd5e2f41e56e5c2feee45"),
"directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"),
"parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")],
"author": {"fullname": b"Linus Torvalds <torvalds@linux-foundation.org>",},
@@ -442,56 +447,52 @@
def test_revision_identifier(self):
self.assertEqual(
- identifiers.revision_identifier(self.revision),
- hash_to_hex(self.revision["id"]),
+ Revision.from_dict(self.revision).id, self.revision["id"],
)
self.assertEqual(
- identifiers.revision_identifier(remove_id(self.revision)),
- hash_to_hex(self.revision["id"]),
+ Revision.from_dict(remove_id(self.revision)).id, self.revision["id"],
)
def test_revision_identifier_none_metadata(self):
self.assertEqual(
- identifiers.revision_identifier(remove_id(self.revision_none_metadata)),
- hash_to_hex(self.revision_none_metadata["id"]),
+ Revision.from_dict(remove_id(self.revision_none_metadata)).id,
+ self.revision_none_metadata["id"],
)
def test_revision_identifier_synthetic(self):
self.assertEqual(
- identifiers.revision_identifier(remove_id(self.synthetic_revision)),
- hash_to_hex(self.synthetic_revision["id"]),
+ Revision.from_dict(remove_id(self.synthetic_revision)).id,
+ self.synthetic_revision["id"],
)
def test_revision_identifier_with_extra_headers(self):
self.assertEqual(
- identifiers.revision_identifier(
- remove_id(self.revision_with_extra_headers)
- ),
- hash_to_hex(self.revision_with_extra_headers["id"]),
+ Revision.from_dict(remove_id(self.revision_with_extra_headers)).id,
+ self.revision_with_extra_headers["id"],
)
def test_revision_identifier_with_gpgsig(self):
self.assertEqual(
- identifiers.revision_identifier(remove_id(self.revision_with_gpgsig)),
- hash_to_hex(self.revision_with_gpgsig["id"]),
+ Revision.from_dict(remove_id(self.revision_with_gpgsig)).id,
+ self.revision_with_gpgsig["id"],
)
def test_revision_identifier_no_message(self):
self.assertEqual(
- identifiers.revision_identifier(remove_id(self.revision_no_message)),
- hash_to_hex(self.revision_no_message["id"]),
+ Revision.from_dict(remove_id(self.revision_no_message)).id,
+ self.revision_no_message["id"],
)
def test_revision_identifier_empty_message(self):
self.assertEqual(
- identifiers.revision_identifier(remove_id(self.revision_empty_message)),
- hash_to_hex(self.revision_empty_message["id"]),
+ Revision.from_dict(remove_id(self.revision_empty_message)).id,
+ self.revision_empty_message["id"],
)
def test_revision_identifier_only_fullname(self):
self.assertEqual(
- identifiers.revision_identifier(remove_id(self.revision_only_fullname)),
- hash_to_hex(self.revision_only_fullname["id"]),
+ Revision.from_dict(remove_id(self.revision_only_fullname)).id,
+ self.revision_only_fullname["id"],
)
@@ -608,48 +609,46 @@
def test_release_identifier(self):
self.assertEqual(
- identifiers.release_identifier(self.release),
- hash_to_hex(self.release["id"]),
+ Release.from_dict(self.release).id, self.release["id"],
)
self.assertEqual(
- identifiers.release_identifier(remove_id(self.release)),
- hash_to_hex(self.release["id"]),
+ Release.from_dict(remove_id(self.release)).id, self.release["id"],
)
def test_release_identifier_no_author(self):
self.assertEqual(
- identifiers.release_identifier(remove_id(self.release_no_author)),
- hash_to_hex(self.release_no_author["id"]),
+ Release.from_dict(remove_id(self.release_no_author)).id,
+ self.release_no_author["id"],
)
def test_release_identifier_no_message(self):
self.assertEqual(
- identifiers.release_identifier(remove_id(self.release_no_message)),
- hash_to_hex(self.release_no_message["id"]),
+ Release.from_dict(remove_id(self.release_no_message)).id,
+ self.release_no_message["id"],
)
def test_release_identifier_empty_message(self):
self.assertEqual(
- identifiers.release_identifier(remove_id(self.release_empty_message)),
- hash_to_hex(self.release_empty_message["id"]),
+ Release.from_dict(remove_id(self.release_empty_message)).id,
+ self.release_empty_message["id"],
)
def test_release_identifier_negative_utc(self):
self.assertEqual(
- identifiers.release_identifier(remove_id(self.release_negative_utc)),
- hash_to_hex(self.release_negative_utc["id"]),
+ Release.from_dict(remove_id(self.release_negative_utc)).id,
+ self.release_negative_utc["id"],
)
def test_release_identifier_newline_in_author(self):
self.assertEqual(
- identifiers.release_identifier(remove_id(self.release_newline_in_author)),
- hash_to_hex(self.release_newline_in_author["id"]),
+ Release.from_dict(remove_id(self.release_newline_in_author)).id,
+ self.release_newline_in_author["id"],
)
def test_release_identifier_snapshot_target(self):
self.assertEqual(
- identifiers.release_identifier(self.release_snapshot_target),
- hash_to_hex(self.release_snapshot_target["id"]),
+ Release.from_dict(self.release_snapshot_target).id,
+ self.release_snapshot_target["id"],
)
@@ -687,17 +686,17 @@
super().setUp()
self.empty = {
- "id": "1a8893e6a86f444e8be8e7bda6cb34fb1735a00e",
+ "id": _x("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"),
"branches": {},
}
self.dangling_branch = {
- "id": "c84502e821eb21ed84e9fd3ec40973abc8b32353",
+ "id": _x("c84502e821eb21ed84e9fd3ec40973abc8b32353"),
"branches": {b"HEAD": None,},
}
self.unresolved = {
- "id": "84b4548ea486e4b0a7933fa541ff1503a0afe1e0",
+ "id": _x("84b4548ea486e4b0a7933fa541ff1503a0afe1e0"),
"branches": {b"foo": {"target": b"bar", "target_type": "alias",},},
}
@@ -705,24 +704,22 @@
def test_empty_snapshot(self):
self.assertEqual(
- identifiers.snapshot_identifier(remove_id(self.empty)),
- hash_to_hex(self.empty["id"]),
+ Snapshot.from_dict(remove_id(self.empty)).id, self.empty["id"],
)
def test_dangling_branch(self):
self.assertEqual(
- identifiers.snapshot_identifier(remove_id(self.dangling_branch)),
- hash_to_hex(self.dangling_branch["id"]),
+ Snapshot.from_dict(remove_id(self.dangling_branch)).id,
+ self.dangling_branch["id"],
)
def test_unresolved(self):
with self.assertRaisesRegex(ValueError, "b'foo' -> b'bar'"):
- identifiers.snapshot_identifier(remove_id(self.unresolved))
+ Snapshot.from_dict(remove_id(self.unresolved))
def test_all_types(self):
self.assertEqual(
- identifiers.snapshot_identifier(remove_id(self.all_types)),
- hash_to_hex(self.all_types["id"]),
+ Snapshot.from_dict(remove_id(self.all_types)).id, self.all_types["id"],
)
@@ -775,15 +772,18 @@
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(self.minimal), git_object,
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(self.minimal)
+ ),
+ git_object,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(self.minimal),
- hashlib.sha1(git_object).hexdigest(),
+ RawExtrinsicMetadata.from_dict(self.minimal).id,
+ hashlib.sha1(git_object).digest(),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(self.minimal),
- "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+ RawExtrinsicMetadata.from_dict(self.minimal).id,
+ _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
)
def test_maximal(self):
@@ -806,15 +806,18 @@
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(self.maximal), git_object,
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(self.maximal)
+ ),
+ git_object,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(self.maximal),
- hashlib.sha1(git_object).hexdigest(),
+ RawExtrinsicMetadata.from_dict(self.maximal).id,
+ hashlib.sha1(git_object).digest(),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(self.maximal),
- "f96966e1093d15236a31fde07e47d5b1c9428049",
+ RawExtrinsicMetadata.from_dict(self.maximal).id,
+ _x("f96966e1093d15236a31fde07e47d5b1c9428049"),
)
def test_nonascii_path(self):
@@ -836,15 +839,18 @@
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(metadata), git_object,
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(metadata)
+ ),
+ git_object,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- hashlib.sha1(git_object).hexdigest(),
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ hashlib.sha1(git_object).digest(),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- "7cc83fd1912176510c083f5df43f01b09af4b333",
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ _x("7cc83fd1912176510c083f5df43f01b09af4b333"),
)
def test_timezone_insensitive(self):
@@ -859,16 +865,20 @@
}
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(self.minimal),
- identifiers.raw_extrinsic_metadata_git_object(metadata),
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(self.minimal)
+ ),
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(metadata)
+ ),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(self.minimal),
- identifiers.raw_extrinsic_metadata_identifier(metadata),
+ RawExtrinsicMetadata.from_dict(self.minimal).id,
+ RawExtrinsicMetadata.from_dict(metadata).id,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
)
def test_microsecond_insensitive(self):
@@ -882,16 +892,20 @@
}
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(self.minimal),
- identifiers.raw_extrinsic_metadata_git_object(metadata),
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(self.minimal)
+ ),
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(metadata)
+ ),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(self.minimal),
- identifiers.raw_extrinsic_metadata_identifier(metadata),
+ RawExtrinsicMetadata.from_dict(self.minimal).id,
+ RawExtrinsicMetadata.from_dict(metadata).id,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
)
def test_noninteger_timezone(self):
@@ -906,16 +920,20 @@
}
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(self.minimal),
- identifiers.raw_extrinsic_metadata_git_object(metadata),
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(self.minimal)
+ ),
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(metadata)
+ ),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(self.minimal),
- identifiers.raw_extrinsic_metadata_identifier(metadata),
+ RawExtrinsicMetadata.from_dict(self.minimal).id,
+ RawExtrinsicMetadata.from_dict(metadata).id,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
)
def test_negative_timestamp(self):
@@ -938,15 +956,18 @@
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(metadata), git_object,
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(metadata)
+ ),
+ git_object,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- hashlib.sha1(git_object).hexdigest(),
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ hashlib.sha1(git_object).digest(),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- "895d0821a2991dd376ddc303424aceb7c68280f9",
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ _x("895d0821a2991dd376ddc303424aceb7c68280f9"),
)
def test_epoch(self):
@@ -969,15 +990,18 @@
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(metadata), git_object,
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(metadata)
+ ),
+ git_object,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- hashlib.sha1(git_object).hexdigest(),
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ hashlib.sha1(git_object).digest(),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- "27a53df54ace35ebd910493cdc70b334d6b7cb88",
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ _x("27a53df54ace35ebd910493cdc70b334d6b7cb88"),
)
def test_negative_epoch(self):
@@ -1000,15 +1024,18 @@
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(metadata), git_object,
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(metadata)
+ ),
+ git_object,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- hashlib.sha1(git_object).hexdigest(),
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ hashlib.sha1(git_object).digest(),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- "be7154a8fd49d87f81547ea634d1e2152907d089",
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ _x("be7154a8fd49d87f81547ea634d1e2152907d089"),
)
@@ -1020,8 +1047,8 @@
class OriginIdentifier(unittest.TestCase):
def test_content_identifier(self):
self.assertEqual(
- identifiers.origin_identifier(origin_example),
- "b63a575fe3faab7692c9f38fb09d4bb45651bb0f",
+ Origin.from_dict(origin_example).id,
+ _x("b63a575fe3faab7692c9f38fb09d4bb45651bb0f"),
)
@@ -1111,7 +1138,7 @@
@pytest.mark.parametrize("dict_input,expected", TS_DICTS)
def test_normalize_timestamp_dict(dict_input, expected):
- assert normalize_timestamp(dict_input) == expected
+ assert TimestampWithTimezone.from_dict(dict_input).to_dict() == expected
TS_DICTS_INVALID_TIMESTAMP = [
@@ -1127,7 +1154,7 @@
@pytest.mark.parametrize("dict_input", TS_DICTS_INVALID_TIMESTAMP)
def test_normalize_timestamp_dict_invalid_timestamp(dict_input):
with pytest.raises(ValueError, match="non-integer timestamp"):
- normalize_timestamp(dict_input)
+ TimestampWithTimezone.from_dict(dict_input)
UTC = datetime.timezone.utc
@@ -1152,7 +1179,7 @@
@pytest.mark.parametrize("microsecond", [0, 1, 10, 100, 1000, 999999])
def test_normalize_timestamp_datetime(date, seconds, tz, offset, microsecond):
date = date.astimezone(tz).replace(microsecond=microsecond)
- assert normalize_timestamp(date) == {
+ assert TimestampWithTimezone.from_dict(date).to_dict() == {
"timestamp": {"seconds": seconds, "microseconds": microsecond},
"offset": offset,
"negative_utc": False,
@@ -1776,18 +1803,19 @@
"target": "swh:1:dir:" + "00" * 20,
}
- assert (
- identifiers.extid_identifier(extid_dict)
- == "b9295e1931c31e40a7e3e1e967decd1c89426455"
+ assert ExtID.from_dict(extid_dict).id == _x(
+ "b9295e1931c31e40a7e3e1e967decd1c89426455"
)
- assert identifiers.extid_identifier(
- {**extid_dict, "extid_version": 0}
- ) == identifiers.extid_identifier(extid_dict)
+ assert (
+ ExtID.from_dict({**extid_dict, "extid_version": 0}).id
+ == ExtID.from_dict(extid_dict).id
+ )
- assert identifiers.extid_identifier(
- {**extid_dict, "extid_version": 1}
- ) != identifiers.extid_identifier(extid_dict)
+ assert (
+ ExtID.from_dict({**extid_dict, "extid_version": 1}).id
+ != ExtID.from_dict(extid_dict).id
+ )
def test_object_types():
diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py
--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -12,20 +12,8 @@
from hypothesis.strategies import binary
import pytest
-from swh.model.hashutil import MultiHash, hash_to_bytes, hash_to_hex
+from swh.model.hashutil import MultiHash, hash_to_bytes
import swh.model.hypothesis_strategies as strategies
-from swh.model.identifiers import (
- CoreSWHID,
- ExtendedSWHID,
- ObjectType,
- content_identifier,
- directory_identifier,
- origin_identifier,
- raw_extrinsic_metadata_identifier,
- release_identifier,
- revision_identifier,
- snapshot_identifier,
-)
from swh.model.model import (
BaseModel,
Content,
@@ -46,14 +34,13 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhids import CoreSWHID, ExtendedSWHID, ObjectType
from swh.model.tests.swh_model_data import TEST_OBJECTS
from swh.model.tests.test_identifiers import (
TS_DATETIMES,
TS_TIMEZONES,
- content_example,
directory_example,
metadata_example,
- origin_example,
release_example,
revision_example,
snapshot_example,
@@ -736,94 +723,6 @@
assert rev_model.extra_headers == extra_headers
-# ID computation
-
-
-def test_content_model_id_computation():
- cnt_dict = content_example.copy()
-
- cnt_id_str = hash_to_hex(content_identifier(cnt_dict)["sha1_git"])
- cnt_model = Content.from_data(cnt_dict["data"])
- assert str(cnt_model.swhid()) == "swh:1:cnt:" + cnt_id_str
-
-
-def test_directory_model_id_computation():
- dir_dict = directory_example.copy()
- del dir_dict["id"]
-
- dir_id_str = directory_identifier(dir_dict)
- dir_id = hash_to_bytes(dir_id_str)
- dir_model = Directory.from_dict(dir_dict)
- assert dir_model.id == dir_id
- assert str(dir_model.swhid()) == "swh:1:dir:" + dir_id_str
-
-
-def test_revision_model_id_computation():
- rev_dict = revision_example.copy()
- del rev_dict["id"]
-
- rev_id_str = revision_identifier(rev_dict)
- rev_id = hash_to_bytes(rev_id_str)
- rev_model = Revision.from_dict(rev_dict)
- assert rev_model.id == rev_id
- assert str(rev_model.swhid()) == "swh:1:rev:" + rev_id_str
-
-
-def test_revision_model_id_computation_with_no_date():
- """We can have revision with date to None
-
- """
- rev_dict = revision_example.copy()
- rev_dict["date"] = None
- rev_dict["committer_date"] = None
- del rev_dict["id"]
-
- rev_id = hash_to_bytes(revision_identifier(rev_dict))
- rev_model = Revision.from_dict(rev_dict)
- assert rev_model.date is None
- assert rev_model.committer_date is None
- assert rev_model.id == rev_id
-
-
-def test_release_model_id_computation():
- rel_dict = release_example.copy()
- del rel_dict["id"]
-
- rel_id_str = release_identifier(rel_dict)
- rel_id = hash_to_bytes(rel_id_str)
- rel_model = Release.from_dict(rel_dict)
- assert isinstance(rel_model.date, TimestampWithTimezone)
- assert rel_model.id == hash_to_bytes(rel_id)
- assert str(rel_model.swhid()) == "swh:1:rel:" + rel_id_str
-
-
-def test_snapshot_model_id_computation():
- snp_dict = snapshot_example.copy()
- del snp_dict["id"]
-
- snp_id_str = snapshot_identifier(snp_dict)
- snp_id = hash_to_bytes(snp_id_str)
- snp_model = Snapshot.from_dict(snp_dict)
- assert snp_model.id == snp_id
- assert str(snp_model.swhid()) == "swh:1:snp:" + snp_id_str
-
-
-def test_origin_model_id_computation():
- ori_dict = origin_example.copy()
-
- ori_id_str = origin_identifier(ori_dict)
- ori_model = Origin.from_dict(ori_dict)
- assert str(ori_model.swhid()) == "swh:1:ori:" + ori_id_str
-
-
-def test_raw_extrinsic_metadata_model_id_computation():
- emd_dict = metadata_example.copy()
-
- emd_id_str = raw_extrinsic_metadata_identifier(emd_dict)
- emd_model = RawExtrinsicMetadata.from_dict(emd_dict)
- assert str(emd_model.swhid()) == "swh:1:emd:" + emd_id_str
-
-
@given(strategies.objects(split_content=True))
def test_object_type(objtype_and_obj):
obj_type, obj = objtype_and_obj
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 9:10 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218157
Attached To
D6330: Deprecate identifiers.py
Event Timeline
Log In to Comment