Page MenuHomeSoftware Heritage

D6330.diff
No OneTemporary

D6330.diff

diff --git a/swh/model/cli.py b/swh/model/cli.py
--- a/swh/model/cli.py
+++ b/swh/model/cli.py
@@ -26,7 +26,7 @@
swh_cli_group = click # type: ignore
from swh.model.from_disk import Directory
-from swh.model.identifiers import CoreSWHID, ObjectType
+from swh.model.swhids import CoreSWHID
CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
@@ -42,7 +42,7 @@
class CoreSWHIDParamType(click.ParamType):
"""Click argument that accepts a core SWHID and returns them as
- :class:`swh.model.identifiers.CoreSWHID` instances """
+ :class:`swh.model.swhids.CoreSWHID` instances """
name = "SWHID"
@@ -87,17 +87,9 @@
def swhid_of_origin(url):
- from swh.model.hashutil import hash_to_bytes
- from swh.model.identifiers import (
- ExtendedObjectType,
- ExtendedSWHID,
- origin_identifier,
- )
+ from swh.model.model import Origin
- return ExtendedSWHID(
- object_type=ExtendedObjectType.ORIGIN,
- object_id=hash_to_bytes(origin_identifier({"url": url})),
- )
+ return Origin(url).swhid()
def swhid_of_git_repo(path) -> CoreSWHID:
@@ -110,7 +102,7 @@
)
from swh.model import hashutil
- from swh.model.identifiers import snapshot_identifier
+ from swh.model.model import Snapshot
repo = dulwich.repo.Repo(path)
@@ -133,10 +125,7 @@
snapshot = {"branches": branches}
- return CoreSWHID(
- object_type=ObjectType.SNAPSHOT,
- object_id=hashutil.hash_to_bytes(snapshot_identifier(snapshot)),
- )
+ return Snapshot.from_dict(snapshot).swhid()
def identify_object(
diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py
--- a/swh/model/from_disk.py
+++ b/swh/model/from_disk.py
@@ -18,14 +18,10 @@
from . import model
from .exceptions import InvalidDirectoryPath
-from .hashutil import MultiHash, hash_to_bytes, hash_to_hex
-from .identifiers import (
- CoreSWHID,
- ObjectType,
- directory_entry_sort_key,
- directory_identifier,
-)
+from .git_objects import directory_entry_sort_key
+from .hashutil import MultiHash, hash_to_hex
from .merkle import MerkleLeaf, MerkleNode
+from .swhids import CoreSWHID, ObjectType
@attr.s(frozen=True, slots=True)
@@ -477,8 +473,8 @@
@property
def entries(self):
- """Child nodes, sorted by name in the same way `directory_identifier`
- does."""
+ """Child nodes, sorted by name in the same way
+ :func:`swh.model.git_objects.directory_git_object` does."""
if self.__entries is None:
self.__entries = sorted(
(
@@ -496,7 +492,7 @@
return CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=self.hash)
def compute_hash(self):
- return hash_to_bytes(directory_identifier({"entries": self.entries}))
+ return model.Directory.from_dict({"entries": self.entries}).id
def to_model(self) -> model.Directory:
"""Builds a `model.Directory` object based on this node;
diff --git a/swh/model/git_objects.py b/swh/model/git_objects.py
--- a/swh/model/git_objects.py
+++ b/swh/model/git_objects.py
@@ -27,7 +27,7 @@
@lru_cache()
def _perms_to_bytes(perms):
- """Convert the perms value to its bytes representation"""
+ """Convert the perms value to its canonical bytes representation"""
oc = oct(perms)[2:]
return oc.encode("ascii")
@@ -117,7 +117,6 @@
UTC
- negative_utc: a boolean representing whether the offset is -0000
when offset = 0.
-
"""
if time_representation is None:
return None
@@ -126,6 +125,41 @@
def directory_git_object(directory: model.Directory) -> bytes:
+ """Formats a directory as a git tree.
+
+ A directory's identifier is the tree sha1 à la git of a directory listing,
+ using the following algorithm, which is equivalent to the git algorithm for
+ trees:
+
+ 1. Entries of the directory are sorted using the name (or the name with '/'
+ appended for directory entries) as key, in bytes order.
+
+ 2. For each entry of the directory, the following bytes are output:
+
+ - the octal representation of the permissions for the entry (stored in
+ the 'perms' member), which is a representation of the entry type:
+
+ - b'100644' (int 33188) for files
+ - b'100755' (int 33261) for executable files
+ - b'120000' (int 40960) for symbolic links
+ - b'40000' (int 16384) for directories
+ - b'160000' (int 57344) for references to revisions
+
+ - an ascii space (b'\x20')
+ - the entry's name (as raw bytes), stored in the 'name' member
+ - a null byte (b'\x00')
+ - the 20 byte long identifier of the object pointed at by the entry,
+ stored in the 'target' member:
+
+ - for files or executable files: their blob sha1_git
+ - for symbolic links: the blob sha1_git of a file containing the link
+ destination
+ - for directories: their intrinsic identifier
+ - for revisions: their intrinsic identifier
+
+ (Note that there is no separator between entries)
+
+ """
if isinstance(directory, dict):
# For backward compatibility
directory = model.Directory.from_dict(directory)
@@ -219,7 +253,6 @@
Returns:
the byte string containing the authorship data
-
"""
ret = [author.fullname]
@@ -234,8 +267,55 @@
def revision_git_object(revision: model.Revision) -> bytes:
- """Formats the git_object of a revision. See :func:`revision_identifier` for details
- on the format."""
+ """Formats a revision as a git tree.
+
+ The fields used for the revision identifier computation are:
+
+ - directory
+ - parents
+ - author
+ - author_date
+ - committer
+ - committer_date
+ - extra_headers or metadata -> extra_headers
+ - message
+
+ A revision's identifier is the 'git'-checksum of a commit manifest
+ constructed as follows (newlines are a single ASCII newline character)::
+
+ tree <directory identifier>
+ [for each parent in parents]
+ parent <parent identifier>
+ [end for each parents]
+ author <author> <author_date>
+ committer <committer> <committer_date>
+ [for each key, value in extra_headers]
+ <key> <encoded value>
+ [end for each extra_headers]
+
+ <message>
+
+ The directory identifier is the ascii representation of its hexadecimal
+ encoding.
+
+ Author and committer are formatted using the :attr:`Person.fullname` attribute only.
+ Dates are formatted with the :func:`format_offset` function.
+
+ Extra headers are an ordered list of [key, value] pairs. Keys are strings
+ and get encoded to utf-8 for identifier computation. Values are either byte
+ strings, unicode strings (that get encoded to utf-8), or integers (that get
+ encoded to their utf-8 decimal representation).
+
+ Multiline extra header values are escaped by indenting the continuation
+ lines with one ascii space.
+
+ If the message is None, the manifest ends with the last header. Else, the
+ message is appended to the headers after an empty line.
+
+ The checksum of the full manifest is computed using the 'commit' git object
+ type.
+
+ """
if isinstance(revision, dict):
# For backward compatibility
revision = model.Revision.from_dict(revision)
@@ -290,8 +370,50 @@
def snapshot_git_object(snapshot: model.Snapshot) -> bytes:
- """Formats the git_object of a revision. See :func:`snapshot_identifier` for details
- on the format."""
+ """Formats a snapshot as a git-like object.
+
+ Snapshots are a set of named branches, which are pointers to objects at any
+ level of the Software Heritage DAG.
+
+ As well as pointing to other objects in the Software Heritage DAG, branches
+ can also be *alias*es, in which case their target is the name of another
+ branch in the same snapshot, or *dangling*, in which case the target is
+ unknown (and represented by the ``None`` value).
+
+ A snapshot identifier is a salted sha1 (using the git hashing algorithm
+ with the ``snapshot`` object type) of a manifest following the algorithm:
+
+ 1. Branches are sorted using the name as key, in bytes order.
+
+ 2. For each branch, the following bytes are output:
+
+ - the type of the branch target:
+
+ - ``content``, ``directory``, ``revision``, ``release`` or ``snapshot``
+ for the corresponding entries in the DAG;
+ - ``alias`` for branches referencing another branch;
+ - ``dangling`` for dangling branches
+
+ - an ascii space (``\\x20``)
+ - the branch name (as raw bytes)
+ - a null byte (``\\x00``)
+ - the length of the target identifier, as an ascii-encoded decimal number
+ (``20`` for current intrinsic identifiers, ``0`` for dangling
+ branches, the length of the target branch name for branch aliases)
+ - a colon (``:``)
+ - the identifier of the target object pointed at by the branch,
+ stored in the 'target' member:
+
+ - for contents: their *sha1_git*
+ - for directories, revisions, releases or snapshots: their intrinsic
+ identifier
+ - for branch aliases, the name of the target branch (as raw bytes)
+ - for dangling branches, the empty string
+
+ Note that, akin to directory manifests, there is no separator between
+ entries. Because of symbolic branches, identifiers are of arbitrary
+ length but are length-encoded to avoid ambiguity.
+ """
if isinstance(snapshot, dict):
# For backward compatibility
snapshot = model.Snapshot.from_dict(snapshot)
@@ -334,9 +456,47 @@
def raw_extrinsic_metadata_git_object(metadata: model.RawExtrinsicMetadata) -> bytes:
- """Formats the git_object of a raw_extrinsic_metadata object.
- See :func:`raw_extrinsic_metadata_identifier` for details
- on the format."""
+ """Formats RawExtrinsicMetadata as a git-like object.
+
+ A raw_extrinsic_metadata identifier is a salted sha1 (using the git
+ hashing algorithm with the ``raw_extrinsic_metadata`` object type) of
+ a manifest following the format::
+
+ target $ExtendedSwhid
+ discovery_date $Timestamp
+ authority $StrWithoutSpaces $IRI
+ fetcher $Str $Version
+ format $StrWithoutSpaces
+ origin $IRI <- optional
+ visit $IntInDecimal <- optional
+ snapshot $CoreSwhid <- optional
+ release $CoreSwhid <- optional
+ revision $CoreSwhid <- optional
+ path $Bytes <- optional
+ directory $CoreSwhid <- optional
+
+ $MetadataBytes
+
+ $IRI must be RFC 3987 IRIs (so they may contain newlines, that are escaped as
+ described below)
+
+ $StrWithoutSpaces and $Version are ASCII strings, and may not contain spaces.
+
+ $Str is an UTF-8 string.
+
+ $CoreSwhid are core SWHIDs, as defined in :ref:`persistent-identifiers`.
+ $ExtendedSwhid is a core SWHID, with extra types allowed ('ori' for
+ origins and 'emd' for raw extrinsic metadata)
+
+ $Timestamp is a decimal representation of the rounded-down integer number of
+ seconds since the UNIX epoch (1970-01-01 00:00:00 UTC),
+ with no leading '0' (unless the timestamp value is zero) and no timezone.
+ It may be negative by prefixing it with a '-', which must not be followed
+ by a '0'.
+
+ Newlines in $Bytes, $Str, and $Iri are escaped as with other git fields,
+ ie. by adding a space after them.
+ """
if isinstance(metadata, dict):
# For backward compatibility
metadata = model.RawExtrinsicMetadata.from_dict(metadata)
@@ -389,6 +549,26 @@
def extid_git_object(extid: model.ExtID) -> bytes:
+ """Formats an extid as a gi-like object.
+
+ An ExtID identifier is a salted sha1 (using the git hashing algorithm with
+ the ``extid`` object type) of a manifest following the format:
+
+ ```
+ extid_type $StrWithoutSpaces
+ [extid_version $Str]
+ extid $Bytes
+ target $CoreSwhid
+ ```
+
+ $StrWithoutSpaces is an ASCII string, and may not contain spaces.
+
+ Newlines in $Bytes are escaped as with other git fields, ie. by adding a
+ space after them.
+
+ The extid_version line is only generated if the version is non-zero.
+ """
+
headers = [
(b"extid_type", extid.extid_type.encode("ascii")),
]
diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py
--- a/swh/model/hypothesis_strategies.py
+++ b/swh/model/hypothesis_strategies.py
@@ -29,8 +29,6 @@
)
from .from_disk import DentryPerms
-from .hashutil import hash_to_bytes
-from .identifiers import ExtendedObjectType, ExtendedSWHID, snapshot_identifier
from .model import (
BaseContent,
Content,
@@ -54,6 +52,7 @@
Timestamp,
TimestampWithTimezone,
)
+from .swhids import ExtendedObjectType, ExtendedSWHID
pgsql_alphabet = characters(
blacklist_categories=("Cs",), blacklist_characters=["\u0000"]
@@ -396,7 +395,7 @@
# Ensure no cycles between aliases
while True:
try:
- id_ = snapshot_identifier(
+ snapshot = Snapshot.from_dict(
{
"branches": {
name: branch or None for (name, branch) in branches.items()
@@ -409,7 +408,7 @@
else:
break
- return dict(id=hash_to_bytes(id_), branches=branches)
+ return snapshot.to_dict()
def snapshots(*, min_size=0, max_size=100, only_objects=False):
diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -3,9 +3,8 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from __future__ import annotations
-
from typing import Any, Dict
+import warnings
from . import model
@@ -16,6 +15,14 @@
# Reexport for backward compatibility
from .swhids import * # noqa
+warnings.warn(
+ "The swh.model.identifiers module is deprecated. "
+ "SWHID-related classes were moved to swh.model.swhids, and identifier "
+ "computation is now done directly with swh.model.model classes.",
+ DeprecationWarning,
+ stacklevel=2,
+)
+
# The following are deprecated aliases of the variants defined in ObjectType
# while transitioning from SWHID to QualifiedSWHID
ORIGIN = "origin"
@@ -28,260 +35,57 @@
def content_identifier(content: Dict[str, Any]) -> Dict[str, bytes]:
- """Return the intrinsic identifier for a content.
-
- A content's identifier is the sha1, sha1_git and sha256 checksums of its
- data.
-
- Args:
- content: a content conforming to the Software Heritage schema
-
- Returns:
- A dictionary with all the hashes for the data
-
- Raises:
- KeyError: if the content doesn't have a data member.
-
+ """Deprecated, use :class:`swh.model.Content` instead:
+ ``content_identifier(d)`` is equivalent to:
+ ``{k: hash_to_hex(v) for (k, v) in Content.from_data(d["data"]).hashes().items()}``
"""
-
return MultiHash.from_data(content["data"]).digest()
def directory_identifier(directory: Dict[str, Any]) -> str:
- """Return the intrinsic identifier for a directory.
-
- A directory's identifier is the tree sha1 à la git of a directory listing,
- using the following algorithm, which is equivalent to the git algorithm for
- trees:
+ """Deprecated, use :class:`swh.model.Directory` instead:
+ ``directory_identifier(d)`` is equivalent to:
+ ``hash_to_hex(Directory.from_dict(d).id)``.
- 1. Entries of the directory are sorted using the name (or the name with '/'
- appended for directory entries) as key, in bytes order.
-
- 2. For each entry of the directory, the following bytes are output:
-
- - the octal representation of the permissions for the entry (stored in
- the 'perms' member), which is a representation of the entry type:
-
- - b'100644' (int 33188) for files
- - b'100755' (int 33261) for executable files
- - b'120000' (int 40960) for symbolic links
- - b'40000' (int 16384) for directories
- - b'160000' (int 57344) for references to revisions
-
- - an ascii space (b'\x20')
- - the entry's name (as raw bytes), stored in the 'name' member
- - a null byte (b'\x00')
- - the 20 byte long identifier of the object pointed at by the entry,
- stored in the 'target' member:
-
- - for files or executable files: their blob sha1_git
- - for symbolic links: the blob sha1_git of a file containing the link
- destination
- - for directories: their intrinsic identifier
- - for revisions: their intrinsic identifier
-
- (Note that there is no separator between entries)
-
- """
+ See :func:`swh.model.git_objects.directory_git_object` for details of the
+ format used to generate this identifier."""
return hash_to_hex(model.Directory.from_dict(directory).id)
def revision_identifier(revision: Dict[str, Any]) -> str:
- """Return the intrinsic identifier for a revision.
-
- The fields used for the revision identifier computation are:
+ """Deprecated, use :class:`swh.model.Revision` instead:
+ ``revision_identifier(d)`` is equivalent to:
+ ``hash_to_hex(Revision.from_dict(d).id)``.
- - directory
- - parents
- - author
- - author_date
- - committer
- - committer_date
- - extra_headers or metadata -> extra_headers
- - message
-
- A revision's identifier is the 'git'-checksum of a commit manifest
- constructed as follows (newlines are a single ASCII newline character)::
-
- tree <directory identifier>
- [for each parent in parents]
- parent <parent identifier>
- [end for each parents]
- author <author> <author_date>
- committer <committer> <committer_date>
- [for each key, value in extra_headers]
- <key> <encoded value>
- [end for each extra_headers]
-
- <message>
-
- The directory identifier is the ascii representation of its hexadecimal
- encoding.
-
- Author and committer are formatted using the :attr:`Person.fullname` attribute only.
- Dates are formatted with the :func:`format_offset` function.
-
- Extra headers are an ordered list of [key, value] pairs. Keys are strings
- and get encoded to utf-8 for identifier computation. Values are either byte
- strings, unicode strings (that get encoded to utf-8), or integers (that get
- encoded to their utf-8 decimal representation).
-
- Multiline extra header values are escaped by indenting the continuation
- lines with one ascii space.
-
- If the message is None, the manifest ends with the last header. Else, the
- message is appended to the headers after an empty line.
-
- The checksum of the full manifest is computed using the 'commit' git object
- type.
-
- """
+ See :func:`swh.model.git_objects.revision_git_object` for details of the
+ format used to generate this identifier."""
return hash_to_hex(model.Revision.from_dict(revision).id)
def release_identifier(release: Dict[str, Any]) -> str:
- """Return the intrinsic identifier for a release."""
+ """Deprecated, use :class:`swh.model.Release` instead:
+ ``release_identifier(d)`` is equivalent to:
+ ``hash_to_hex(Release.from_dict(d).id)``.
+
+ See :func:`swh.model.git_objects.release_git_object` for details of the
+ format used to generate this identifier."""
return hash_to_hex(model.Release.from_dict(release).id)
def snapshot_identifier(snapshot: Dict[str, Any]) -> str:
- """Return the intrinsic identifier for a snapshot.
-
- Snapshots are a set of named branches, which are pointers to objects at any
- level of the Software Heritage DAG.
-
- As well as pointing to other objects in the Software Heritage DAG, branches
- can also be *alias*es, in which case their target is the name of another
- branch in the same snapshot, or *dangling*, in which case the target is
- unknown (and represented by the ``None`` value).
-
- A snapshot identifier is a salted sha1 (using the git hashing algorithm
- with the ``snapshot`` object type) of a manifest following the algorithm:
-
- 1. Branches are sorted using the name as key, in bytes order.
-
- 2. For each branch, the following bytes are output:
-
- - the type of the branch target:
+ """Deprecated, use :class:`swh.model.Snapshot` instead:
+ ``snapshot_identifier(d)`` is equivalent to:
+ ``hash_to_hex(Snapshot.from_dict(d).id)``.
- - ``content``, ``directory``, ``revision``, ``release`` or ``snapshot``
- for the corresponding entries in the DAG;
- - ``alias`` for branches referencing another branch;
- - ``dangling`` for dangling branches
-
- - an ascii space (``\\x20``)
- - the branch name (as raw bytes)
- - a null byte (``\\x00``)
- - the length of the target identifier, as an ascii-encoded decimal number
- (``20`` for current intrinsic identifiers, ``0`` for dangling
- branches, the length of the target branch name for branch aliases)
- - a colon (``:``)
- - the identifier of the target object pointed at by the branch,
- stored in the 'target' member:
-
- - for contents: their *sha1_git*
- - for directories, revisions, releases or snapshots: their intrinsic
- identifier
- - for branch aliases, the name of the target branch (as raw bytes)
- - for dangling branches, the empty string
-
- Note that, akin to directory manifests, there is no separator between
- entries. Because of symbolic branches, identifiers are of arbitrary
- length but are length-encoded to avoid ambiguity.
-
- Args:
- snapshot (dict): the snapshot of which to compute the identifier. A
- single entry is needed, ``'branches'``, which is itself a :class:`dict`
- mapping each branch to its target
-
- Returns:
- str: the intrinsic identifier for `snapshot`
-
- """
+ See :func:`swh.model.git_objects.snapshot_git_object` for details of the
+ format used to generate this identifier."""
return hash_to_hex(model.Snapshot.from_dict(snapshot).id)
def origin_identifier(origin):
- """Return the intrinsic identifier for an origin.
-
- An origin's identifier is the sha1 checksum of the entire origin URL
-
+ """Deprecated, use :class:`swh.model.Origin` instead:
+ ``origin_identifier(url)`` is equivalent to:
+ ``hash_to_hex(Origin(url=url).id)``.
"""
- return hash_to_hex(model.Origin.from_dict(origin).id)
-
-
-def raw_extrinsic_metadata_identifier(metadata: Dict[str, Any]) -> str:
- """Return the intrinsic identifier for a RawExtrinsicMetadata object.
-
- A raw_extrinsic_metadata identifier is a salted sha1 (using the git
- hashing algorithm with the ``raw_extrinsic_metadata`` object type) of
- a manifest following the format::
-
- target $ExtendedSwhid
- discovery_date $Timestamp
- authority $StrWithoutSpaces $IRI
- fetcher $Str $Version
- format $StrWithoutSpaces
- origin $IRI <- optional
- visit $IntInDecimal <- optional
- snapshot $CoreSwhid <- optional
- release $CoreSwhid <- optional
- revision $CoreSwhid <- optional
- path $Bytes <- optional
- directory $CoreSwhid <- optional
-
- $MetadataBytes
-
- $IRI must be RFC 3987 IRIs (so they may contain newlines, that are escaped as
- described below)
-
- $StrWithoutSpaces and $Version are ASCII strings, and may not contain spaces.
-
- $Str is an UTF-8 string.
- $CoreSwhid are core SWHIDs, as defined in :ref:`persistent-identifiers`.
- $ExtendedSwhid is a core SWHID, with extra types allowed ('ori' for
- origins and 'emd' for raw extrinsic metadata)
-
- $Timestamp is a decimal representation of the rounded-down integer number of
- seconds since the UNIX epoch (1970-01-01 00:00:00 UTC),
- with no leading '0' (unless the timestamp value is zero) and no timezone.
- It may be negative by prefixing it with a '-', which must not be followed
- by a '0'.
-
- Newlines in $Bytes, $Str, and $Iri are escaped as with other git fields,
- ie. by adding a space after them.
-
- Returns:
- str: the intrinsic identifier for ``metadata``
-
- """
- return hash_to_hex(model.RawExtrinsicMetadata.from_dict(metadata).id)
-
-
-def extid_identifier(extid: Dict[str, Any]) -> str:
- """Return the intrinsic identifier for an ExtID object.
-
- An ExtID identifier is a salted sha1 (using the git hashing algorithm with
- the ``extid`` object type) of a manifest following the format:
-
- ```
- extid_type $StrWithoutSpaces
- [extid_version $Str]
- extid $Bytes
- target $CoreSwhid
- ```
-
- $StrWithoutSpaces is an ASCII string, and may not contain spaces.
-
- Newlines in $Bytes are escaped as with other git fields, ie. by adding a
- space after them.
-
- The extid_version line is only generated if the version is non-zero.
-
- Returns:
- str: the intrinsic identifier for `extid`
-
- """
-
- return hash_to_hex(model.ExtID.from_dict(extid).id)
+ return hash_to_hex(model.Origin.from_dict(origin).id)
diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py
--- a/swh/model/tests/swh_model_data.py
+++ b/swh/model/tests/swh_model_data.py
@@ -9,7 +9,6 @@
import attr
from swh.model.hashutil import MultiHash, hash_to_bytes
-from swh.model.identifiers import ExtendedSWHID
from swh.model.model import (
BaseModel,
Content,
@@ -35,6 +34,7 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhids import ExtendedSWHID
UTC = datetime.timezone.utc
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -12,18 +12,27 @@
import attr
import pytest
-from swh.model import hashutil, identifiers
+from swh.model import git_objects, hashutil
from swh.model.exceptions import ValidationError
from swh.model.hashutil import hash_to_bytes as _x
-from swh.model.hashutil import hash_to_hex
-from swh.model.identifiers import (
+from swh.model.model import (
+ Content,
+ Directory,
+ ExtID,
+ Origin,
+ RawExtrinsicMetadata,
+ Release,
+ Revision,
+ Snapshot,
+ TimestampWithTimezone,
+)
+from swh.model.swhids import (
SWHID_QUALIFIERS,
CoreSWHID,
ExtendedObjectType,
ExtendedSWHID,
ObjectType,
QualifiedSWHID,
- normalize_timestamp,
)
@@ -51,11 +60,11 @@
def test_format_date(self):
for date_repr, date in self.dates.items():
- self.assertEqual(identifiers.format_date(date), date_repr)
+ self.assertEqual(git_objects.format_date(date), date_repr)
def test_format_offset(self):
for offset, res in self.offsets.items():
- self.assertEqual(identifiers.format_offset(offset), res)
+ self.assertEqual(git_objects.format_offset(offset), res)
content_example = {
@@ -72,7 +81,7 @@
def test_content_identifier(self):
self.assertEqual(
- identifiers.content_identifier(content_example), self.content_id
+ Content.from_data(content_example["data"]).hashes(), self.content_id
)
@@ -199,26 +208,22 @@
}
def test_dir_identifier(self):
+ self.assertEqual(Directory.from_dict(self.directory).id, self.directory["id"])
self.assertEqual(
- _x(identifiers.directory_identifier(self.directory)), self.directory["id"]
- )
- self.assertEqual(
- _x(identifiers.directory_identifier(remove_id(self.directory))),
- self.directory["id"],
+ Directory.from_dict(remove_id(self.directory)).id, self.directory["id"],
)
def test_dir_identifier_entry_order(self):
# Reverse order of entries, check the id is still the same.
directory = {"entries": reversed(self.directory["entries"])}
self.assertEqual(
- _x(identifiers.directory_identifier(remove_id(directory))),
- self.directory["id"],
+ Directory.from_dict(remove_id(directory)).id, self.directory["id"],
)
def test_dir_identifier_empty_directory(self):
self.assertEqual(
- identifiers.directory_identifier(remove_id(self.empty_directory)),
- self.empty_directory["id"],
+ Directory.from_dict(remove_id(self.empty_directory)).id,
+ _x(self.empty_directory["id"]),
)
@@ -270,7 +275,7 @@
self.revision = revision_example
self.revision_none_metadata = {
- "id": "bc0195aad0daa2ad5b0d76cce22b167bc3435590",
+ "id": _x("bc0195aad0daa2ad5b0d76cce22b167bc3435590"),
"directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"),
"parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")],
"author": {
@@ -328,7 +333,7 @@
# cat commit.txt | git hash-object -t commit --stdin
self.revision_with_extra_headers = {
- "id": "010d34f384fa99d047cdd5e2f41e56e5c2feee45",
+ "id": _x("010d34f384fa99d047cdd5e2f41e56e5c2feee45"),
"directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"),
"parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")],
"author": {
@@ -355,7 +360,7 @@
}
self.revision_with_gpgsig = {
- "id": "44cc742a8ca17b9c279be4cc195a93a6ef7a320e",
+ "id": _x("44cc742a8ca17b9c279be4cc195a93a6ef7a320e"),
"directory": _x("b134f9b7dc434f593c0bab696345548b37de0558"),
"parents": [
_x("689664ae944b4692724f13b709a4e4de28b54e57"),
@@ -380,7 +385,7 @@
}
self.revision_no_message = {
- "id": "4cfc623c9238fa92c832beed000ce2d003fd8333",
+ "id": _x("4cfc623c9238fa92c832beed000ce2d003fd8333"),
"directory": _x("b134f9b7dc434f593c0bab696345548b37de0558"),
"parents": [
_x("689664ae944b4692724f13b709a4e4de28b54e57"),
@@ -400,7 +405,7 @@
}
self.revision_empty_message = {
- "id": "7442cd78bd3b4966921d6a7f7447417b7acb15eb",
+ "id": _x("7442cd78bd3b4966921d6a7f7447417b7acb15eb"),
"directory": _x("b134f9b7dc434f593c0bab696345548b37de0558"),
"parents": [
_x("689664ae944b4692724f13b709a4e4de28b54e57"),
@@ -420,7 +425,7 @@
}
self.revision_only_fullname = {
- "id": "010d34f384fa99d047cdd5e2f41e56e5c2feee45",
+ "id": _x("010d34f384fa99d047cdd5e2f41e56e5c2feee45"),
"directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"),
"parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")],
"author": {"fullname": b"Linus Torvalds <torvalds@linux-foundation.org>",},
@@ -442,56 +447,52 @@
def test_revision_identifier(self):
self.assertEqual(
- identifiers.revision_identifier(self.revision),
- hash_to_hex(self.revision["id"]),
+ Revision.from_dict(self.revision).id, self.revision["id"],
)
self.assertEqual(
- identifiers.revision_identifier(remove_id(self.revision)),
- hash_to_hex(self.revision["id"]),
+ Revision.from_dict(remove_id(self.revision)).id, self.revision["id"],
)
def test_revision_identifier_none_metadata(self):
self.assertEqual(
- identifiers.revision_identifier(remove_id(self.revision_none_metadata)),
- hash_to_hex(self.revision_none_metadata["id"]),
+ Revision.from_dict(remove_id(self.revision_none_metadata)).id,
+ self.revision_none_metadata["id"],
)
def test_revision_identifier_synthetic(self):
self.assertEqual(
- identifiers.revision_identifier(remove_id(self.synthetic_revision)),
- hash_to_hex(self.synthetic_revision["id"]),
+ Revision.from_dict(remove_id(self.synthetic_revision)).id,
+ self.synthetic_revision["id"],
)
def test_revision_identifier_with_extra_headers(self):
self.assertEqual(
- identifiers.revision_identifier(
- remove_id(self.revision_with_extra_headers)
- ),
- hash_to_hex(self.revision_with_extra_headers["id"]),
+ Revision.from_dict(remove_id(self.revision_with_extra_headers)).id,
+ self.revision_with_extra_headers["id"],
)
def test_revision_identifier_with_gpgsig(self):
self.assertEqual(
- identifiers.revision_identifier(remove_id(self.revision_with_gpgsig)),
- hash_to_hex(self.revision_with_gpgsig["id"]),
+ Revision.from_dict(remove_id(self.revision_with_gpgsig)).id,
+ self.revision_with_gpgsig["id"],
)
def test_revision_identifier_no_message(self):
self.assertEqual(
- identifiers.revision_identifier(remove_id(self.revision_no_message)),
- hash_to_hex(self.revision_no_message["id"]),
+ Revision.from_dict(remove_id(self.revision_no_message)).id,
+ self.revision_no_message["id"],
)
def test_revision_identifier_empty_message(self):
self.assertEqual(
- identifiers.revision_identifier(remove_id(self.revision_empty_message)),
- hash_to_hex(self.revision_empty_message["id"]),
+ Revision.from_dict(remove_id(self.revision_empty_message)).id,
+ self.revision_empty_message["id"],
)
def test_revision_identifier_only_fullname(self):
self.assertEqual(
- identifiers.revision_identifier(remove_id(self.revision_only_fullname)),
- hash_to_hex(self.revision_only_fullname["id"]),
+ Revision.from_dict(remove_id(self.revision_only_fullname)).id,
+ self.revision_only_fullname["id"],
)
@@ -608,48 +609,46 @@
def test_release_identifier(self):
self.assertEqual(
- identifiers.release_identifier(self.release),
- hash_to_hex(self.release["id"]),
+ Release.from_dict(self.release).id, self.release["id"],
)
self.assertEqual(
- identifiers.release_identifier(remove_id(self.release)),
- hash_to_hex(self.release["id"]),
+ Release.from_dict(remove_id(self.release)).id, self.release["id"],
)
def test_release_identifier_no_author(self):
self.assertEqual(
- identifiers.release_identifier(remove_id(self.release_no_author)),
- hash_to_hex(self.release_no_author["id"]),
+ Release.from_dict(remove_id(self.release_no_author)).id,
+ self.release_no_author["id"],
)
def test_release_identifier_no_message(self):
self.assertEqual(
- identifiers.release_identifier(remove_id(self.release_no_message)),
- hash_to_hex(self.release_no_message["id"]),
+ Release.from_dict(remove_id(self.release_no_message)).id,
+ self.release_no_message["id"],
)
def test_release_identifier_empty_message(self):
self.assertEqual(
- identifiers.release_identifier(remove_id(self.release_empty_message)),
- hash_to_hex(self.release_empty_message["id"]),
+ Release.from_dict(remove_id(self.release_empty_message)).id,
+ self.release_empty_message["id"],
)
def test_release_identifier_negative_utc(self):
self.assertEqual(
- identifiers.release_identifier(remove_id(self.release_negative_utc)),
- hash_to_hex(self.release_negative_utc["id"]),
+ Release.from_dict(remove_id(self.release_negative_utc)).id,
+ self.release_negative_utc["id"],
)
def test_release_identifier_newline_in_author(self):
self.assertEqual(
- identifiers.release_identifier(remove_id(self.release_newline_in_author)),
- hash_to_hex(self.release_newline_in_author["id"]),
+ Release.from_dict(remove_id(self.release_newline_in_author)).id,
+ self.release_newline_in_author["id"],
)
def test_release_identifier_snapshot_target(self):
self.assertEqual(
- identifiers.release_identifier(self.release_snapshot_target),
- hash_to_hex(self.release_snapshot_target["id"]),
+ Release.from_dict(self.release_snapshot_target).id,
+ self.release_snapshot_target["id"],
)
@@ -687,17 +686,17 @@
super().setUp()
self.empty = {
- "id": "1a8893e6a86f444e8be8e7bda6cb34fb1735a00e",
+ "id": _x("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"),
"branches": {},
}
self.dangling_branch = {
- "id": "c84502e821eb21ed84e9fd3ec40973abc8b32353",
+ "id": _x("c84502e821eb21ed84e9fd3ec40973abc8b32353"),
"branches": {b"HEAD": None,},
}
self.unresolved = {
- "id": "84b4548ea486e4b0a7933fa541ff1503a0afe1e0",
+ "id": _x("84b4548ea486e4b0a7933fa541ff1503a0afe1e0"),
"branches": {b"foo": {"target": b"bar", "target_type": "alias",},},
}
@@ -705,24 +704,22 @@
def test_empty_snapshot(self):
self.assertEqual(
- identifiers.snapshot_identifier(remove_id(self.empty)),
- hash_to_hex(self.empty["id"]),
+ Snapshot.from_dict(remove_id(self.empty)).id, self.empty["id"],
)
def test_dangling_branch(self):
self.assertEqual(
- identifiers.snapshot_identifier(remove_id(self.dangling_branch)),
- hash_to_hex(self.dangling_branch["id"]),
+ Snapshot.from_dict(remove_id(self.dangling_branch)).id,
+ self.dangling_branch["id"],
)
def test_unresolved(self):
with self.assertRaisesRegex(ValueError, "b'foo' -> b'bar'"):
- identifiers.snapshot_identifier(remove_id(self.unresolved))
+ Snapshot.from_dict(remove_id(self.unresolved))
def test_all_types(self):
self.assertEqual(
- identifiers.snapshot_identifier(remove_id(self.all_types)),
- hash_to_hex(self.all_types["id"]),
+ Snapshot.from_dict(remove_id(self.all_types)).id, self.all_types["id"],
)
@@ -775,15 +772,18 @@
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(self.minimal), git_object,
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(self.minimal)
+ ),
+ git_object,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(self.minimal),
- hashlib.sha1(git_object).hexdigest(),
+ RawExtrinsicMetadata.from_dict(self.minimal).id,
+ hashlib.sha1(git_object).digest(),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(self.minimal),
- "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+ RawExtrinsicMetadata.from_dict(self.minimal).id,
+ _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
)
def test_maximal(self):
@@ -806,15 +806,18 @@
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(self.maximal), git_object,
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(self.maximal)
+ ),
+ git_object,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(self.maximal),
- hashlib.sha1(git_object).hexdigest(),
+ RawExtrinsicMetadata.from_dict(self.maximal).id,
+ hashlib.sha1(git_object).digest(),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(self.maximal),
- "f96966e1093d15236a31fde07e47d5b1c9428049",
+ RawExtrinsicMetadata.from_dict(self.maximal).id,
+ _x("f96966e1093d15236a31fde07e47d5b1c9428049"),
)
def test_nonascii_path(self):
@@ -836,15 +839,18 @@
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(metadata), git_object,
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(metadata)
+ ),
+ git_object,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- hashlib.sha1(git_object).hexdigest(),
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ hashlib.sha1(git_object).digest(),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- "7cc83fd1912176510c083f5df43f01b09af4b333",
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ _x("7cc83fd1912176510c083f5df43f01b09af4b333"),
)
def test_timezone_insensitive(self):
@@ -859,16 +865,20 @@
}
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(self.minimal),
- identifiers.raw_extrinsic_metadata_git_object(metadata),
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(self.minimal)
+ ),
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(metadata)
+ ),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(self.minimal),
- identifiers.raw_extrinsic_metadata_identifier(metadata),
+ RawExtrinsicMetadata.from_dict(self.minimal).id,
+ RawExtrinsicMetadata.from_dict(metadata).id,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
)
def test_microsecond_insensitive(self):
@@ -882,16 +892,20 @@
}
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(self.minimal),
- identifiers.raw_extrinsic_metadata_git_object(metadata),
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(self.minimal)
+ ),
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(metadata)
+ ),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(self.minimal),
- identifiers.raw_extrinsic_metadata_identifier(metadata),
+ RawExtrinsicMetadata.from_dict(self.minimal).id,
+ RawExtrinsicMetadata.from_dict(metadata).id,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
)
def test_noninteger_timezone(self):
@@ -906,16 +920,20 @@
}
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(self.minimal),
- identifiers.raw_extrinsic_metadata_git_object(metadata),
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(self.minimal)
+ ),
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(metadata)
+ ),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(self.minimal),
- identifiers.raw_extrinsic_metadata_identifier(metadata),
+ RawExtrinsicMetadata.from_dict(self.minimal).id,
+ RawExtrinsicMetadata.from_dict(metadata).id,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
)
def test_negative_timestamp(self):
@@ -938,15 +956,18 @@
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(metadata), git_object,
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(metadata)
+ ),
+ git_object,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- hashlib.sha1(git_object).hexdigest(),
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ hashlib.sha1(git_object).digest(),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- "895d0821a2991dd376ddc303424aceb7c68280f9",
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ _x("895d0821a2991dd376ddc303424aceb7c68280f9"),
)
def test_epoch(self):
@@ -969,15 +990,18 @@
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(metadata), git_object,
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(metadata)
+ ),
+ git_object,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- hashlib.sha1(git_object).hexdigest(),
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ hashlib.sha1(git_object).digest(),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- "27a53df54ace35ebd910493cdc70b334d6b7cb88",
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ _x("27a53df54ace35ebd910493cdc70b334d6b7cb88"),
)
def test_negative_epoch(self):
@@ -1000,15 +1024,18 @@
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_git_object(metadata), git_object,
+ git_objects.raw_extrinsic_metadata_git_object(
+ RawExtrinsicMetadata.from_dict(metadata)
+ ),
+ git_object,
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- hashlib.sha1(git_object).hexdigest(),
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ hashlib.sha1(git_object).digest(),
)
self.assertEqual(
- identifiers.raw_extrinsic_metadata_identifier(metadata),
- "be7154a8fd49d87f81547ea634d1e2152907d089",
+ RawExtrinsicMetadata.from_dict(metadata).id,
+ _x("be7154a8fd49d87f81547ea634d1e2152907d089"),
)
@@ -1020,8 +1047,8 @@
class OriginIdentifier(unittest.TestCase):
def test_content_identifier(self):
self.assertEqual(
- identifiers.origin_identifier(origin_example),
- "b63a575fe3faab7692c9f38fb09d4bb45651bb0f",
+ Origin.from_dict(origin_example).id,
+ _x("b63a575fe3faab7692c9f38fb09d4bb45651bb0f"),
)
@@ -1111,7 +1138,7 @@
@pytest.mark.parametrize("dict_input,expected", TS_DICTS)
def test_normalize_timestamp_dict(dict_input, expected):
- assert normalize_timestamp(dict_input) == expected
+ assert TimestampWithTimezone.from_dict(dict_input).to_dict() == expected
TS_DICTS_INVALID_TIMESTAMP = [
@@ -1127,7 +1154,7 @@
@pytest.mark.parametrize("dict_input", TS_DICTS_INVALID_TIMESTAMP)
def test_normalize_timestamp_dict_invalid_timestamp(dict_input):
with pytest.raises(ValueError, match="non-integer timestamp"):
- normalize_timestamp(dict_input)
+ TimestampWithTimezone.from_dict(dict_input)
UTC = datetime.timezone.utc
@@ -1152,7 +1179,7 @@
@pytest.mark.parametrize("microsecond", [0, 1, 10, 100, 1000, 999999])
def test_normalize_timestamp_datetime(date, seconds, tz, offset, microsecond):
date = date.astimezone(tz).replace(microsecond=microsecond)
- assert normalize_timestamp(date) == {
+ assert TimestampWithTimezone.from_dict(date).to_dict() == {
"timestamp": {"seconds": seconds, "microseconds": microsecond},
"offset": offset,
"negative_utc": False,
@@ -1776,18 +1803,19 @@
"target": "swh:1:dir:" + "00" * 20,
}
- assert (
- identifiers.extid_identifier(extid_dict)
- == "b9295e1931c31e40a7e3e1e967decd1c89426455"
+ assert ExtID.from_dict(extid_dict).id == _x(
+ "b9295e1931c31e40a7e3e1e967decd1c89426455"
)
- assert identifiers.extid_identifier(
- {**extid_dict, "extid_version": 0}
- ) == identifiers.extid_identifier(extid_dict)
+ assert (
+ ExtID.from_dict({**extid_dict, "extid_version": 0}).id
+ == ExtID.from_dict(extid_dict).id
+ )
- assert identifiers.extid_identifier(
- {**extid_dict, "extid_version": 1}
- ) != identifiers.extid_identifier(extid_dict)
+ assert (
+ ExtID.from_dict({**extid_dict, "extid_version": 1}).id
+ != ExtID.from_dict(extid_dict).id
+ )
def test_object_types():
diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py
--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -12,20 +12,8 @@
from hypothesis.strategies import binary
import pytest
-from swh.model.hashutil import MultiHash, hash_to_bytes, hash_to_hex
+from swh.model.hashutil import MultiHash, hash_to_bytes
import swh.model.hypothesis_strategies as strategies
-from swh.model.identifiers import (
- CoreSWHID,
- ExtendedSWHID,
- ObjectType,
- content_identifier,
- directory_identifier,
- origin_identifier,
- raw_extrinsic_metadata_identifier,
- release_identifier,
- revision_identifier,
- snapshot_identifier,
-)
from swh.model.model import (
BaseModel,
Content,
@@ -46,14 +34,13 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhids import CoreSWHID, ExtendedSWHID, ObjectType
from swh.model.tests.swh_model_data import TEST_OBJECTS
from swh.model.tests.test_identifiers import (
TS_DATETIMES,
TS_TIMEZONES,
- content_example,
directory_example,
metadata_example,
- origin_example,
release_example,
revision_example,
snapshot_example,
@@ -736,94 +723,6 @@
assert rev_model.extra_headers == extra_headers
-# ID computation
-
-
-def test_content_model_id_computation():
- cnt_dict = content_example.copy()
-
- cnt_id_str = hash_to_hex(content_identifier(cnt_dict)["sha1_git"])
- cnt_model = Content.from_data(cnt_dict["data"])
- assert str(cnt_model.swhid()) == "swh:1:cnt:" + cnt_id_str
-
-
-def test_directory_model_id_computation():
- dir_dict = directory_example.copy()
- del dir_dict["id"]
-
- dir_id_str = directory_identifier(dir_dict)
- dir_id = hash_to_bytes(dir_id_str)
- dir_model = Directory.from_dict(dir_dict)
- assert dir_model.id == dir_id
- assert str(dir_model.swhid()) == "swh:1:dir:" + dir_id_str
-
-
-def test_revision_model_id_computation():
- rev_dict = revision_example.copy()
- del rev_dict["id"]
-
- rev_id_str = revision_identifier(rev_dict)
- rev_id = hash_to_bytes(rev_id_str)
- rev_model = Revision.from_dict(rev_dict)
- assert rev_model.id == rev_id
- assert str(rev_model.swhid()) == "swh:1:rev:" + rev_id_str
-
-
-def test_revision_model_id_computation_with_no_date():
- """We can have revision with date to None
-
- """
- rev_dict = revision_example.copy()
- rev_dict["date"] = None
- rev_dict["committer_date"] = None
- del rev_dict["id"]
-
- rev_id = hash_to_bytes(revision_identifier(rev_dict))
- rev_model = Revision.from_dict(rev_dict)
- assert rev_model.date is None
- assert rev_model.committer_date is None
- assert rev_model.id == rev_id
-
-
-def test_release_model_id_computation():
- rel_dict = release_example.copy()
- del rel_dict["id"]
-
- rel_id_str = release_identifier(rel_dict)
- rel_id = hash_to_bytes(rel_id_str)
- rel_model = Release.from_dict(rel_dict)
- assert isinstance(rel_model.date, TimestampWithTimezone)
- assert rel_model.id == hash_to_bytes(rel_id)
- assert str(rel_model.swhid()) == "swh:1:rel:" + rel_id_str
-
-
-def test_snapshot_model_id_computation():
- snp_dict = snapshot_example.copy()
- del snp_dict["id"]
-
- snp_id_str = snapshot_identifier(snp_dict)
- snp_id = hash_to_bytes(snp_id_str)
- snp_model = Snapshot.from_dict(snp_dict)
- assert snp_model.id == snp_id
- assert str(snp_model.swhid()) == "swh:1:snp:" + snp_id_str
-
-
-def test_origin_model_id_computation():
- ori_dict = origin_example.copy()
-
- ori_id_str = origin_identifier(ori_dict)
- ori_model = Origin.from_dict(ori_dict)
- assert str(ori_model.swhid()) == "swh:1:ori:" + ori_id_str
-
-
-def test_raw_extrinsic_metadata_model_id_computation():
- emd_dict = metadata_example.copy()
-
- emd_id_str = raw_extrinsic_metadata_identifier(emd_dict)
- emd_model = RawExtrinsicMetadata.from_dict(emd_dict)
- assert str(emd_model.swhid()) == "swh:1:emd:" + emd_id_str
-
-
@given(strategies.objects(split_content=True))
def test_object_type(objtype_and_obj):
obj_type, obj = objtype_and_obj

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 9:10 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218157

Event Timeline