diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -17,7 +17,7 @@ normalize_timestamp, directory_identifier, revision_identifier, release_identifier, snapshot_identifier ) -from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes +from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, MultiHash class MissingData(Exception): @@ -390,6 +390,15 @@ type=str, validator=attr.validators.in_(['visible', 'hidden', 'absent'])) + @staticmethod + def hash_data(data: bytes): + """Hash some data, returning most of the fields of a content object""" + d = MultiHash.from_data(data).digest() + d['data'] = data + d['length'] = len(data) + + return d + def to_dict(self): content = super().to_dict() if content['ctime'] is None: @@ -448,6 +457,12 @@ del content['data'] return content + @classmethod + def from_data(cls, data, status='visible') -> 'Content': + d = cls.hash_data(data) + d['status'] = status + return cls(**d) + @classmethod def from_dict(cls, d): return super().from_dict(d, use_subclass=False) @@ -503,6 +518,14 @@ del content['origin'] return content + @classmethod + def from_data(cls, data, reason: str) -> 'SkippedContent': + d = cls.hash_data(data) + del d['data'] + d['status'] = 'absent' + d['reason'] = reason + return cls(**d) + @classmethod def from_dict(cls, d): d2 = d diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -7,14 +7,15 @@ import datetime from hypothesis import given +from hypothesis.strategies import binary import pytest from swh.model.model import ( - Content, Directory, Revision, Release, Snapshot, + Content, SkippedContent, Directory, Revision, Release, Snapshot, Timestamp, TimestampWithTimezone, MissingData, ) -from swh.model.hashutil import hash_to_bytes +from swh.model.hashutil import hash_to_bytes, MultiHash from swh.model.hypothesis_strategies import objects, origins, origin_visits from swh.model.identifiers import ( directory_identifier, revision_identifier, release_identifier, @@ -137,6 +138,36 @@ c.with_data() +@given(binary(max_size=4096)) +def test_content_from_data(data): + c = Content.from_data(data) + assert c.data == data + assert c.length == len(data) + assert c.status == 'visible' + for key, value in MultiHash.from_data(data).digest().items(): + assert getattr(c, key) == value + + +@given(binary(max_size=4096)) +def test_hidden_content_from_data(data): + c = Content.from_data(data, status='hidden') + assert c.data == data + assert c.length == len(data) + assert c.status == 'hidden' + for key, value in MultiHash.from_data(data).digest().items(): + assert getattr(c, key) == value + + +@given(binary(max_size=4096)) +def test_skipped_content_from_data(data): + c = SkippedContent.from_data(data, reason='reason') + assert c.reason == 'reason' + assert c.length == len(data) + assert c.status == 'absent' + for key, value in MultiHash.from_data(data).digest().items(): + assert getattr(c, key) == value + + def test_directory_model_id_computation(): dir_dict = dict(directory_example) del dir_dict['id']