diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py --- a/swh/model/from_disk.py +++ b/swh/model/from_disk.py @@ -18,15 +18,13 @@ from . import model from .exceptions import InvalidDirectoryPath -from .hashutil import MultiHash +from .hashutil import MultiHash, hash_to_bytes, hash_to_hex from .identifiers import ( CoreSWHID, ObjectType, directory_entry_sort_key, directory_identifier, ) -from .identifiers import identifier_to_bytes as id_to_bytes -from .identifiers import identifier_to_str as id_to_str from .merkle import MerkleLeaf, MerkleNode @@ -218,7 +216,7 @@ return CoreSWHID(object_type=ObjectType.CONTENT, object_id=self.hash) def __repr__(self): - return "Content(id=%s)" % id_to_str(self.hash) + return "Content(id=%s)" % hash_to_hex(self.hash) def compute_hash(self): return self.data["sha1_git"] @@ -498,7 +496,7 @@ return CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=self.hash) def compute_hash(self): - return id_to_bytes(directory_identifier({"entries": self.entries})) + return hash_to_bytes(directory_identifier({"entries": self.entries})) def to_model(self) -> model.Directory: """Builds a `model.Directory` object based on this node; @@ -550,6 +548,6 @@ def __repr__(self): return "Directory(id=%s, entries=[%s])" % ( - id_to_str(self.hash), + hash_to_hex(self.hash), ", ".join(str(entry) for entry in self), ) diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py --- a/swh/model/hypothesis_strategies.py +++ b/swh/model/hypothesis_strategies.py @@ -29,12 +29,8 @@ ) from .from_disk import DentryPerms -from .identifiers import ( - ExtendedObjectType, - ExtendedSWHID, - identifier_to_bytes, - snapshot_identifier, -) +from .hashutil import hash_to_bytes +from .identifiers import ExtendedObjectType, ExtendedSWHID, snapshot_identifier from .model import ( BaseContent, Content, @@ -413,7 +409,7 @@ else: break - return dict(id=identifier_to_bytes(id_), branches=branches) + return dict(id=hash_to_bytes(id_), branches=branches) def snapshots(*, min_size=0, max_size=100, only_objects=False): diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -5,8 +5,6 @@ from __future__ import annotations -import binascii -from functools import lru_cache from typing import Any, Dict from . import model @@ -29,75 +27,6 @@ RAW_EXTRINSIC_METADATA = "raw_extrinsic_metadata" -@lru_cache() -def identifier_to_bytes(identifier): - """Convert a text identifier to bytes. - - Args: - identifier: an identifier, either a 40-char hexadecimal string or a - bytes object of length 20 - Returns: - The length 20 bytestring corresponding to the given identifier - - Raises: - ValueError: if the identifier is of an unexpected type or length. - """ - - if isinstance(identifier, bytes): - if len(identifier) != 20: - raise ValueError( - "Wrong length for bytes identifier %s, expected 20" % len(identifier) - ) - return identifier - - if isinstance(identifier, str): - if len(identifier) != 40: - raise ValueError( - "Wrong length for str identifier %s, expected 40" % len(identifier) - ) - return bytes.fromhex(identifier) - - raise ValueError( - "Wrong type for identifier %s, expected bytes or str" - % identifier.__class__.__name__ - ) - - -@lru_cache() -def identifier_to_str(identifier): - """Convert an identifier to an hexadecimal string. - - Args: - identifier: an identifier, either a 40-char hexadecimal string or a - bytes object of length 20 - - Returns: - The length 40 string corresponding to the given identifier, hex encoded - - Raises: - ValueError: if the identifier is of an unexpected type or length. - """ - - if isinstance(identifier, str): - if len(identifier) != 40: - raise ValueError( - "Wrong length for str identifier %s, expected 40" % len(identifier) - ) - return identifier - - if isinstance(identifier, bytes): - if len(identifier) != 20: - raise ValueError( - "Wrong length for bytes identifier %s, expected 20" % len(identifier) - ) - return binascii.hexlify(identifier).decode() - - raise ValueError( - "Wrong type for identifier %s, expected bytes or str" - % identifier.__class__.__name__ - ) - - def content_identifier(content: Dict[str, Any]) -> Dict[str, bytes]: """Return the intrinsic identifier for a content. diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -3,7 +3,6 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import binascii import datetime import hashlib import itertools @@ -16,6 +15,7 @@ from swh.model import hashutil, identifiers from swh.model.exceptions import ValidationError from swh.model.hashutil import hash_to_bytes as _x +from swh.model.hashutil import hash_to_hex from swh.model.identifiers import ( SWHID_QUALIFIERS, CoreSWHID, @@ -35,43 +35,6 @@ return d -class UtilityFunctionsIdentifier(unittest.TestCase): - def setUp(self): - self.str_id = "c2e41aae41ac17bd4a650770d6ee77f62e52235b" - self.bytes_id = binascii.unhexlify(self.str_id) - self.bad_type_id = object() - - def test_identifier_to_bytes(self): - for id in [self.str_id, self.bytes_id]: - self.assertEqual(identifiers.identifier_to_bytes(id), self.bytes_id) - - # wrong length - with self.assertRaises(ValueError) as cm: - identifiers.identifier_to_bytes(id[:-2]) - - self.assertIn("length", str(cm.exception)) - - with self.assertRaises(ValueError) as cm: - identifiers.identifier_to_bytes(self.bad_type_id) - - self.assertIn("type", str(cm.exception)) - - def test_identifier_to_str(self): - for id in [self.str_id, self.bytes_id]: - self.assertEqual(identifiers.identifier_to_str(id), self.str_id) - - # wrong length - with self.assertRaises(ValueError) as cm: - identifiers.identifier_to_str(id[:-2]) - - self.assertIn("length", str(cm.exception)) - - with self.assertRaises(ValueError) as cm: - identifiers.identifier_to_str(self.bad_type_id) - - self.assertIn("type", str(cm.exception)) - - class UtilityFunctionsDateOffset(unittest.TestCase): def setUp(self): self.dates = { @@ -482,23 +445,23 @@ def test_revision_identifier(self): self.assertEqual( identifiers.revision_identifier(self.revision), - identifiers.identifier_to_str(self.revision["id"]), + hash_to_hex(self.revision["id"]), ) self.assertEqual( identifiers.revision_identifier(remove_id(self.revision)), - identifiers.identifier_to_str(self.revision["id"]), + hash_to_hex(self.revision["id"]), ) def test_revision_identifier_none_metadata(self): self.assertEqual( identifiers.revision_identifier(remove_id(self.revision_none_metadata)), - identifiers.identifier_to_str(self.revision_none_metadata["id"]), + hash_to_hex(self.revision_none_metadata["id"]), ) def test_revision_identifier_synthetic(self): self.assertEqual( identifiers.revision_identifier(remove_id(self.synthetic_revision)), - identifiers.identifier_to_str(self.synthetic_revision["id"]), + hash_to_hex(self.synthetic_revision["id"]), ) def test_revision_identifier_with_extra_headers(self): @@ -506,31 +469,31 @@ identifiers.revision_identifier( remove_id(self.revision_with_extra_headers) ), - identifiers.identifier_to_str(self.revision_with_extra_headers["id"]), + hash_to_hex(self.revision_with_extra_headers["id"]), ) def test_revision_identifier_with_gpgsig(self): self.assertEqual( identifiers.revision_identifier(remove_id(self.revision_with_gpgsig)), - identifiers.identifier_to_str(self.revision_with_gpgsig["id"]), + hash_to_hex(self.revision_with_gpgsig["id"]), ) def test_revision_identifier_no_message(self): self.assertEqual( identifiers.revision_identifier(remove_id(self.revision_no_message)), - identifiers.identifier_to_str(self.revision_no_message["id"]), + hash_to_hex(self.revision_no_message["id"]), ) def test_revision_identifier_empty_message(self): self.assertEqual( identifiers.revision_identifier(remove_id(self.revision_empty_message)), - identifiers.identifier_to_str(self.revision_empty_message["id"]), + hash_to_hex(self.revision_empty_message["id"]), ) def test_revision_identifier_only_fullname(self): self.assertEqual( identifiers.revision_identifier(remove_id(self.revision_only_fullname)), - identifiers.identifier_to_str(self.revision_only_fullname["id"]), + hash_to_hex(self.revision_only_fullname["id"]), ) @@ -648,47 +611,47 @@ def test_release_identifier(self): self.assertEqual( identifiers.release_identifier(self.release), - identifiers.identifier_to_str(self.release["id"]), + hash_to_hex(self.release["id"]), ) self.assertEqual( identifiers.release_identifier(remove_id(self.release)), - identifiers.identifier_to_str(self.release["id"]), + hash_to_hex(self.release["id"]), ) def test_release_identifier_no_author(self): self.assertEqual( identifiers.release_identifier(remove_id(self.release_no_author)), - identifiers.identifier_to_str(self.release_no_author["id"]), + hash_to_hex(self.release_no_author["id"]), ) def test_release_identifier_no_message(self): self.assertEqual( identifiers.release_identifier(remove_id(self.release_no_message)), - identifiers.identifier_to_str(self.release_no_message["id"]), + hash_to_hex(self.release_no_message["id"]), ) def test_release_identifier_empty_message(self): self.assertEqual( identifiers.release_identifier(remove_id(self.release_empty_message)), - identifiers.identifier_to_str(self.release_empty_message["id"]), + hash_to_hex(self.release_empty_message["id"]), ) def test_release_identifier_negative_utc(self): self.assertEqual( identifiers.release_identifier(remove_id(self.release_negative_utc)), - identifiers.identifier_to_str(self.release_negative_utc["id"]), + hash_to_hex(self.release_negative_utc["id"]), ) def test_release_identifier_newline_in_author(self): self.assertEqual( identifiers.release_identifier(remove_id(self.release_newline_in_author)), - identifiers.identifier_to_str(self.release_newline_in_author["id"]), + hash_to_hex(self.release_newline_in_author["id"]), ) def test_release_identifier_snapshot_target(self): self.assertEqual( identifiers.release_identifier(self.release_snapshot_target), - identifiers.identifier_to_str(self.release_snapshot_target["id"]), + hash_to_hex(self.release_snapshot_target["id"]), ) @@ -745,13 +708,13 @@ def test_empty_snapshot(self): self.assertEqual( identifiers.snapshot_identifier(remove_id(self.empty)), - identifiers.identifier_to_str(self.empty["id"]), + hash_to_hex(self.empty["id"]), ) def test_dangling_branch(self): self.assertEqual( identifiers.snapshot_identifier(remove_id(self.dangling_branch)), - identifiers.identifier_to_str(self.dangling_branch["id"]), + hash_to_hex(self.dangling_branch["id"]), ) def test_unresolved(self): @@ -761,7 +724,7 @@ def test_all_types(self): self.assertEqual( identifiers.snapshot_identifier(remove_id(self.all_types)), - identifiers.identifier_to_str(self.all_types["id"]), + hash_to_hex(self.all_types["id"]), )