diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -88,6 +88,45 @@ name = attr.ib(type=Optional[bytes]) email = attr.ib(type=Optional[bytes]) + @classmethod + def from_address(cls, address: Union[str, bytes]): + if address is None: + raise TypeError('address is None.') + + name: Optional[bytes] + email: Optional[bytes] + + if not isinstance(address, bytes): + address = address.encode('utf8') + try: + open_bracket = address.index(b'<') + except ValueError: + name = address + email = None + else: + raw_name = address[:open_bracket] + raw_email = address[open_bracket+1:] + + if not raw_name: + name = None + elif raw_name.endswith(b' '): + name = raw_name[:-1] + else: + name = raw_name + + try: + close_bracket = raw_email.index(b'>') + except ValueError: + email = raw_email + else: + email = raw_email[:close_bracket] + + return Person( + name=name or None, + email=email or None, + fullname=address, + ) + @attr.s(frozen=True) class Timestamp(BaseModel): diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -12,7 +12,7 @@ from swh.model.model import ( Content, Directory, Revision, Release, Snapshot, Timestamp, TimestampWithTimezone, - MissingData, + MissingData, Person ) from swh.model.hashutil import hash_to_bytes from swh.model.hypothesis_strategies import objects, origins, origin_visits @@ -107,6 +107,86 @@ ) +def test_person_from_address(): + """The author should have name, email and fullname filled. + + """ + actual_person = Person.from_address('tony ') + assert actual_person == Person( + fullname=b'tony ', + name=b'tony', + email=b'ynot@dagobah', + ) + + +def test_person_from_address_no_email(): + """The author and fullname should be the same as the input (author). + + """ + actual_person = Person.from_address('tony') + assert actual_person == Person( + fullname=b'tony', + name=b'tony', + email=None, + ) + + +def test_person_from_address_empty_person(): + """Empty person has only its fullname filled with the empty + byte-string. + + """ + actual_person = Person.from_address('') + assert actual_person == Person( + fullname=b'', + name=None, + email=None, + ) + + +def test_git_author_line_to_author(): + # edge case out of the way + with pytest.raises(TypeError): + Person.from_address(None) + + tests = { + b'a ': Person( + name=b'a', + email=b'b@c.com', + fullname=b'a ', + ), + b'': Person( + name=None, + email=b'foo@bar.com', + fullname=b'', + ), + b'malformed ': Person( + name=b'trailing', + email=b'sp@c.e', + fullname=b'trailing ', + ), + b'no': Person( + name=b'no', + email=b'sp@c.e', + fullname=b'no', + ), + b' <>': Person( + name=None, + email=None, + fullname=b' <>', + ), + } + + for person in sorted(tests): + expected_person = tests[person] + assert expected_person == Person.from_address(person) + + def test_content_get_hash(): hashes = dict( sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')