diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py --- a/swh/model/hypothesis_strategies.py +++ b/swh/model/hypothesis_strategies.py @@ -18,6 +18,7 @@ from_regex, integers, just, + lists, none, one_of, sampled_from, @@ -232,6 +233,12 @@ revision_metadata = metadata_dicts +def extra_headers(): + return lists( + tuples(binary(min_size=0, max_size=50), binary(min_size=0, max_size=500)) + ).map(tuple) + + def revisions_d(): return builds( dict, @@ -245,6 +252,7 @@ directory=sha1_git(), type=sampled_from([x.value for x in RevisionType]), metadata=optional(revision_metadata()), + extra_headers=extra_headers(), ) # TODO: metadata['extra_headers'] can have binary keys and values diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -417,7 +417,7 @@ - author_date - committer - committer_date - - metadata -> extra_headers + - extra_headers or metadata -> extra_headers - message A revision's identifier is the 'git'-checksum of a commit manifest @@ -477,22 +477,13 @@ ) # Handle extra headers - metadata = revision.get("metadata") - if not metadata: - metadata = {} + metadata = revision.get("metadata") or {} + extra_headers = revision.get("extra_headers", ()) + if not extra_headers and "extra_headers" in metadata: + extra_headers = metadata["extra_headers"] - for key, value in metadata.get("extra_headers", []): - - # Integer values: decimal representation - if isinstance(value, int): - value = str(value).encode("utf-8") - - # Unicode string values: utf-8 encoding - if isinstance(value, str): - value = value.encode("utf-8") - - # encode the key to utf-8 - components.extend([key.encode("utf-8"), b" ", escape_newlines(value), b"\n"]) + for key, value in extra_headers: + components.extend([key, b" ", escape_newlines(value), b"\n"]) if revision["message"] is not None: components.extend([b"\n", revision["message"]]) diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -6,9 +6,10 @@ import datetime from abc import ABCMeta, abstractmethod +from copy import deepcopy from enum import Enum from hashlib import sha256 -from typing import Dict, Optional, Tuple, TypeVar, Union +from typing import Dict, Iterable, Optional, Tuple, TypeVar, Union from typing_extensions import Final import attr @@ -416,6 +417,10 @@ MERCURIAL = "hg" +def tuplify_extra_headers(value: Iterable) -> Tuple: + return tuple((k, v) for k, v in value) + + @attr.s(frozen=True) class Revision(BaseModel, HashableObject): object_type: Final = "revision" @@ -435,6 +440,26 @@ ) parents = attr.ib(type=Tuple[Sha1Git, ...], validator=type_validator(), default=()) id = attr.ib(type=Sha1Git, validator=type_validator(), default=b"") + extra_headers = attr.ib( + type=Tuple[Tuple[bytes, bytes], ...], # but it makes mypy sad + validator=type_validator(), + converter=tuplify_extra_headers, # type: ignore + default=(), + ) + + def __attrs_post_init__(self): + super().__attrs_post_init__() + # ensure metadata is a deep copy of whatever was given, and if needed + # extract extra_headers from there + if self.metadata: + metadata = deepcopy(self.metadata) + if not self.extra_headers and "extra_headers" in metadata: + object.__setattr__( + self, + "extra_headers", + tuplify_extra_headers(metadata.pop("extra_headers")), + ) + object.__setattr__(self, "metadata", metadata) @staticmethod def compute_hash(object_dict): diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -370,12 +370,10 @@ 2015, 7, 12, 15, 10, 30, tzinfo=linus_tz ), "message": b"Linux 4.2-rc2\n", - "metadata": { - "extra_headers": [ - ["svn-repo-uuid", "046f1af7-66c2-d61b-5410-ce57b7db7bff"], - ["svn-revision", 10], - ] - }, + "extra_headers": ( + (b"svn-repo-uuid", b"046f1af7-66c2-d61b-5410-ce57b7db7bff"), + (b"svn-revision", b"10"), + ), } self.revision_with_gpgsig = { @@ -393,7 +391,7 @@ "date": {"timestamp": 1428538899, "offset": 480,}, "committer": {"name": b"Jiang Xin", "email": b"worldhello.net@gmail.com",}, "committer_date": {"timestamp": 1428538899, "offset": 480,}, - "metadata": {"extra_headers": [["gpgsig", gpgsig],],}, + "extra_headers": ((b"gpgsig", gpgsig),), "message": b"""Merge branch 'master' of git://github.com/alexhenrie/git-po * 'master' of git://github.com/alexhenrie/git-po: @@ -450,12 +448,10 @@ 2015, 7, 12, 15, 10, 30, tzinfo=linus_tz ), "message": b"Linux 4.2-rc2\n", - "metadata": { - "extra_headers": [ - ["svn-repo-uuid", "046f1af7-66c2-d61b-5410-ce57b7db7bff"], - ["svn-revision", 10], - ] - }, + "extra_headers": ( + (b"svn-repo-uuid", b"046f1af7-66c2-d61b-5410-ce57b7db7bff"), + (b"svn-revision", b"10"), + ), } def test_revision_identifier(self): diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -415,6 +415,82 @@ SkippedContent.from_dict(skipped_content_d) +# Revision + + +def test_revision_extra_headers(): + rev_dict = revision_example.copy() + rev_dict.pop("id") + rev = Revision.from_dict(rev_dict) + rev_dict = attr.asdict(rev, recurse=False) + + rev_model = Revision(**rev_dict) + assert rev_model.metadata is None + assert rev_model.extra_headers == () + + rev_dict["metadata"] = { + "something": "somewhere", + "some other thing": "stranger", + } + rev_model = Revision(**rev_dict) + assert rev_model.metadata == rev_dict["metadata"] + assert rev_model.extra_headers == () + + extra_headers = ( + ("header1", "value1"), + ("header2", 42), + ("header3", b"should I?\u0000"), + ("header1", "again"), + ) + + # check the bw-compat init hook does the job + rev_dict["metadata"]["extra_headers"] = extra_headers + rev_model = Revision(**rev_dict) + assert "extra_headers" not in rev_model.metadata + assert rev_model.extra_headers == extra_headers + + # check Revision.extra_headers converter does the job + rev_dict["metadata"]["extra_headers"] = [list(x) for x in extra_headers] + rev_model = Revision(**rev_dict) + assert "extra_headers" not in rev_model.metadata + assert rev_model.extra_headers == extra_headers + + +def test_revision_extra_headers_from_dict(): + rev_dict = revision_example.copy() + rev_dict.pop("id") + rev_model = Revision.from_dict(rev_dict) + assert rev_model.metadata is None + assert rev_model.extra_headers == () + + rev_dict["metadata"] = { + "something": "somewhere", + "some other thing": "stranger", + } + rev_model = Revision.from_dict(rev_dict) + assert rev_model.metadata == rev_dict["metadata"] + assert rev_model.extra_headers == () + + extra_headers = ( + (b"header1", b"value1"), + (b"header2", b"42"), + (b"header3", b"should I?\nmaybe\x00\xff"), + (b"header1", b"again"), + ) + + # check the bw-compat init hook does the job + rev_dict["metadata"]["extra_headers"] = extra_headers + rev_model = Revision.from_dict(rev_dict) + assert "extra_headers" not in rev_model.metadata + assert rev_model.extra_headers == extra_headers + + # check Revision.extra_headers converter does the job + rev_dict["metadata"]["extra_headers"] = [list(x) for x in extra_headers] + rev_model = Revision.from_dict(rev_dict) + assert "extra_headers" not in rev_model.metadata + assert rev_model.extra_headers == extra_headers + + # ID computation