diff --git a/mypy.ini b/mypy.ini --- a/mypy.ini +++ b/mypy.ini @@ -11,6 +11,9 @@ [mypy-dulwich.*] ignore_missing_imports = True +[mypy-iso8601.*] +ignore_missing_imports = True + [mypy-pkg_resources.*] ignore_missing_imports = True diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ attrs hypothesis python-dateutil +iso8601 diff --git a/swh/model/merkle.py b/swh/model/merkle.py --- a/swh/model/merkle.py +++ b/swh/model/merkle.py @@ -8,7 +8,7 @@ import abc import collections -from typing import List, Optional +from typing import Iterator, List, Optional, Set def deep_update(left, right): @@ -120,6 +120,13 @@ self.__hash = None self.collected = False + def __eq__(self, other): + return isinstance(other, MerkleNode) \ + and super().__eq__(other) and self.data == other.data + + def __ne__(self, other): + return not self.__eq__(other) + def invalidate_hash(self): """Invalidate the cached hash of the current node.""" if not self.__hash: @@ -266,6 +273,20 @@ for child in self.values(): child.reset_collect() + def iter_tree(self) -> Iterator['MerkleNode']: + """Yields all children nodes, recursively. Common nodes are + deduplicated. + """ + yield from self._iter_tree(set()) + + def _iter_tree( + self, seen: Set[bytes]) -> Iterator['MerkleNode']: + if self.hash not in seen: + seen.add(self.hash) + yield self + for child in self.values(): + yield from child._iter_tree(seen=seen) + class MerkleLeaf(MerkleNode): """A leaf to a Merkle tree. diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -7,10 +7,11 @@ from abc import ABCMeta, abstractmethod from enum import Enum -from typing import List, Optional, Dict +from typing import List, Optional, Dict, Union import attr import dateutil.parser +import iso8601 from .identifiers import ( normalize_timestamp, directory_identifier, revision_identifier, @@ -124,15 +125,31 @@ raise ValueError('offset too large: %d minutes' % value) @classmethod - def from_dict(cls, d): + def from_dict(cls, obj: Union[Dict, datetime.datetime, int]): """Builds a TimestampWithTimezone from any of the formats accepted by :func:`swh.model.normalize_timestamp`.""" - d = normalize_timestamp(d) + # TODO: this accept way more types than just dicts; find a better + # name + d = normalize_timestamp(obj) return cls( timestamp=Timestamp.from_dict(d['timestamp']), offset=d['offset'], negative_utc=d['negative_utc']) + @classmethod + def from_datetime(cls, dt: datetime.datetime): + return cls.from_dict(dt) + + @classmethod + def from_iso8601(cls, s): + """Builds a TimestampWithTimezone from an ISO8601-formatted string. + """ + dt = iso8601.parse_date(s) + tstz = cls.from_datetime(dt) + if dt.tzname() == '-00:00': + tstz = attr.evolve(tstz, negative_utc=True) + return tstz + @attr.s(frozen=True) class Origin(BaseModel): diff --git a/swh/model/tests/test_merkle.py b/swh/model/tests/test_merkle.py --- a/swh/model/tests/test_merkle.py +++ b/swh/model/tests/test_merkle.py @@ -46,6 +46,14 @@ self.data = {'value': b'value'} self.instance = MerkleTestLeaf(self.data) + def test_equality(self): + leaf1 = MerkleTestLeaf(self.data) + leaf2 = MerkleTestLeaf(self.data) + leaf3 = MerkleTestLeaf({}) + + self.assertEqual(leaf1, leaf2) + self.assertNotEqual(leaf1, leaf3) + def test_hash(self): self.assertEqual(self.instance.compute_hash_called, 0) instance_hash = self.instance.hash @@ -114,6 +122,20 @@ node2[j] = node3 self.nodes[value3] = node3 + def test_equality(self): + node1 = merkle.MerkleNode({'foo': b'bar'}) + node2 = merkle.MerkleNode({'foo': b'bar'}) + node3 = merkle.MerkleNode({}) + + self.assertEqual(node1, node2) + self.assertNotEqual(node1, node3, node1 == node3) + + node1['foo'] = node3 + self.assertNotEqual(node1, node2) + + node2['foo'] = node3 + self.assertEqual(node1, node2) + def test_hash(self): for node in self.nodes.values(): self.assertEqual(node.compute_hash_called, 0) @@ -162,6 +184,10 @@ collected2 = self.root.collect() self.assertEqual(collected2, {}) + def test_iter_tree(self): + nodes = list(self.root.iter_tree()) + self.assertCountEqual(nodes, self.nodes.values()) + def test_get(self): for key in (b'a', b'b', b'c'): self.assertEqual(self.root[key], self.nodes[b'root/' + key]) diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -4,12 +4,16 @@ # See top-level LICENSE file for more information import copy +import datetime from hypothesis import given import pytest -from swh.model.model import Content, Directory, Revision, Release, Snapshot -from swh.model.model import MissingData +from swh.model.model import ( + Content, Directory, Revision, Release, Snapshot, + Timestamp, TimestampWithTimezone, + MissingData, +) from swh.model.hashutil import hash_to_bytes from swh.model.hypothesis_strategies import objects, origins, origin_visits from swh.model.identifiers import ( @@ -56,6 +60,53 @@ assert origin_visit == type(origin_visit).from_dict(obj) +def test_timestampwithtimezone_from_datetime(): + tz = datetime.timezone(datetime.timedelta(minutes=+60)) + date = datetime.datetime( + 2020, 2, 27, 14, 39, 19, tzinfo=tz) + + tstz = TimestampWithTimezone.from_datetime(date) + + assert tstz == TimestampWithTimezone( + timestamp=Timestamp( + seconds=1582810759, + microseconds=0, + ), + offset=60, + negative_utc=False, + ) + + +def test_timestampwithtimezone_from_iso8601(): + date = '2020-02-27 14:39:19.123456+0100' + + tstz = TimestampWithTimezone.from_iso8601(date) + + assert tstz == TimestampWithTimezone( + timestamp=Timestamp( + seconds=1582810759, + microseconds=123456, + ), + offset=60, + negative_utc=False, + ) + + +def test_timestampwithtimezone_from_iso8601_negative_utc(): + date = '2020-02-27 13:39:19-0000' + + tstz = TimestampWithTimezone.from_iso8601(date) + + assert tstz == TimestampWithTimezone( + timestamp=Timestamp( + seconds=1582810759, + microseconds=0, + ), + offset=0, + negative_utc=True, + ) + + def test_content_get_hash(): hashes = dict( sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')