diff --git a/swh/loader/git/converters.py b/swh/loader/git/converters.py --- a/swh/loader/git/converters.py +++ b/swh/loader/git/converters.py @@ -5,26 +5,25 @@ """Convert dulwich objects to dictionaries suitable for swh.storage""" -from swh.model import identifiers +from typing import Any, Dict, Optional + from swh.model.hashutil import ( DEFAULT_ALGORITHMS, hash_to_bytes, MultiHash ) +from swh.model.model import ( + BaseContent, Content, Directory, DirectoryEntry, + ObjectType, Person, Release, Revision, RevisionType, + SkippedContent, TargetType, Timestamp, TimestampWithTimezone, +) HASH_ALGORITHMS = DEFAULT_ALGORITHMS - {'sha1_git'} -def origin_url_to_origin(origin_url): - """Format a pygit2.Repository as an origin suitable for swh.storage""" - return { - 'url': origin_url, - } - - -def dulwich_blob_to_content_id(blob): +def dulwich_blob_to_content_id(blob) -> Dict[str, Any]: """Convert a dulwich blob to a Software Heritage content id""" if blob.type_name != b'blob': - return + raise ValueError('Argument is not a blob.') size = blob.raw_length() data = blob.as_raw_string() @@ -34,29 +33,33 @@ return hashes -def dulwich_blob_to_content(blob): +def dulwich_blob_to_content(blob, max_content_size=None) -> BaseContent: """Convert a dulwich blob to a Software Heritage content """ if blob.type_name != b'blob': - return - ret = dulwich_blob_to_content_id(blob) - data = blob.as_raw_string() - ret['data'] = data - ret['status'] = 'visible' - return ret + raise ValueError('Argument is not a blob.') + hashes = dulwich_blob_to_content_id(blob) + if max_content_size is not None and hashes['length'] >= max_content_size: + return SkippedContent( + status='absent', + reason='Content too large', + **hashes, + ) + else: + return Content( + data=blob.as_raw_string(), + status='visible', + **hashes, + ) -def dulwich_tree_to_directory(tree, log=None): +def dulwich_tree_to_directory(tree, log=None) -> Directory: """Format a tree as a directory""" if tree.type_name != b'tree': - return + raise ValueError('Argument is not a tree.') - ret = { - 'id': tree.sha().digest(), - } entries = [] - ret['entries'] = entries entry_mode_map = { 0o040000: 'dir', @@ -67,21 +70,23 @@ } for entry in tree.iteritems(): - entries.append({ - 'type': entry_mode_map.get(entry.mode, 'file'), - 'perms': entry.mode, - 'name': entry.path, - 'target': hash_to_bytes(entry.sha.decode('ascii')), - }) + entries.append(DirectoryEntry( + type=entry_mode_map.get(entry.mode, 'file'), + perms=entry.mode, + name=entry.path, + target=hash_to_bytes(entry.sha.decode('ascii')), + )) - return ret + return Directory( + id=tree.sha().digest(), + entries=entries, + ) -def parse_author(name_email): +def parse_author(name_email: bytes) -> Person: """Parse an author line""" - if name_email is None: - return None + raise ValueError('fullname is None') try: open_bracket = name_email.index(b'<') @@ -105,48 +110,30 @@ else: email = raw_email[:close_bracket] - return { - 'name': name, - 'email': email, - 'fullname': name_email, - } + return Person( + name=name, + email=email, + fullname=name_email, + ) -def dulwich_tsinfo_to_timestamp(timestamp, timezone, timezone_neg_utc): +def dulwich_tsinfo_to_timestamp( + timestamp, timezone, timezone_neg_utc) -> TimestampWithTimezone: """Convert the dulwich timestamp information to a structure compatible with Software Heritage""" - return { - 'timestamp': timestamp, - 'offset': timezone // 60, - 'negative_utc': timezone_neg_utc if timezone == 0 else None, - } + return TimestampWithTimezone( + timestamp=Timestamp( + seconds=timestamp, + microseconds=0, + ), + offset=timezone // 60, + negative_utc=timezone_neg_utc if timezone == 0 else None, + ) -def dulwich_commit_to_revision(commit, log=None): +def dulwich_commit_to_revision(commit, log=None) -> Revision: if commit.type_name != b'commit': - return - - ret = { - 'id': commit.sha().digest(), - 'author': parse_author(commit.author), - 'date': dulwich_tsinfo_to_timestamp( - commit.author_time, - commit.author_timezone, - commit._author_timezone_neg_utc, - ), - 'committer': parse_author(commit.committer), - 'committer_date': dulwich_tsinfo_to_timestamp( - commit.commit_time, - commit.commit_timezone, - commit._commit_timezone_neg_utc, - ), - 'type': 'git', - 'directory': bytes.fromhex(commit.tree.decode()), - 'message': commit.message, - 'metadata': None, - 'synthetic': False, - 'parents': [bytes.fromhex(p.decode()) for p in commit.parents], - } + raise ValueError('Argument is not a commit.') git_metadata = [] if commit.encoding is not None: @@ -164,54 +151,77 @@ git_metadata.append(['gpgsig', commit.gpgsig]) if git_metadata: - ret['metadata'] = { + metadata: Optional[Dict[str, Any]] = { 'extra_headers': git_metadata, } + else: + metadata = None - return ret + return Revision( + id=commit.sha().digest(), + author=parse_author(commit.author), + date=dulwich_tsinfo_to_timestamp( + commit.author_time, + commit.author_timezone, + commit._author_timezone_neg_utc, + ), + committer=parse_author(commit.committer), + committer_date=dulwich_tsinfo_to_timestamp( + commit.commit_time, + commit.commit_timezone, + commit._commit_timezone_neg_utc, + ), + type=RevisionType.GIT, + directory=bytes.fromhex(commit.tree.decode()), + message=commit.message, + metadata=metadata, + synthetic=False, + parents=[bytes.fromhex(p.decode()) for p in commit.parents], + ) + + +DULWICH_TARGET_TYPES = { + b'blob': TargetType.CONTENT, + b'tree': TargetType.DIRECTORY, + b'commit': TargetType.REVISION, + b'tag': TargetType.RELEASE, +} -DULWICH_TYPES = { - b'blob': 'content', - b'tree': 'directory', - b'commit': 'revision', - b'tag': 'release', +DULWICH_OBJECT_TYPES = { + b'blob': ObjectType.CONTENT, + b'tree': ObjectType.DIRECTORY, + b'commit': ObjectType.REVISION, + b'tag': ObjectType.RELEASE, } -def dulwich_tag_to_release(tag, log=None): +def dulwich_tag_to_release(tag, log=None) -> Release: if tag.type_name != b'tag': - return + raise ValueError('Argument is not a tag.') target_type, target = tag.object - ret = { - 'id': tag.sha().digest(), - 'name': tag.name, - 'target': bytes.fromhex(target.decode()), - 'target_type': DULWICH_TYPES[target_type.type_name], - 'message': tag._message, - 'metadata': None, - 'synthetic': False, - } if tag.tagger: - ret['author'] = parse_author(tag.tagger) + author: Optional[Person] = parse_author(tag.tagger) if not tag.tag_time: - ret['date'] = None + date = None else: - ret['date'] = dulwich_tsinfo_to_timestamp( + date = dulwich_tsinfo_to_timestamp( tag.tag_time, tag.tag_timezone, tag._tag_timezone_neg_utc, ) else: - ret['author'] = ret['date'] = None - - return ret - - -def branches_to_snapshot(branches): - snapshot = {'branches': branches} - snapshot_id = identifiers.snapshot_identifier(snapshot) - snapshot['id'] = identifiers.identifier_to_bytes(snapshot_id) - - return snapshot + author = date = None + + return Release( + id=tag.sha().digest(), + author=author, + date=date, + name=tag.name, + target=bytes.fromhex(target.decode()), + target_type=DULWICH_OBJECT_TYPES[target_type.type_name], + message=tag._message, + metadata=None, + synthetic=False, + ) diff --git a/swh/loader/git/from_disk.py b/swh/loader/git/from_disk.py --- a/swh/loader/git/from_disk.py +++ b/swh/loader/git/from_disk.py @@ -7,12 +7,16 @@ import dulwich.repo import os import shutil +from typing import Dict, Optional from dulwich.errors import ObjectFormatException, EmptyFileException from collections import defaultdict from swh.model import hashutil +from swh.model.model import ( + Origin, Snapshot, SnapshotBranch, TargetType) from swh.loader.core.loader import DVCSLoader + from . import converters, utils @@ -32,7 +36,7 @@ self.directory = directory def prepare_origin_visit(self, *args, **kwargs): - self.origin = converters.origin_url_to_origin(self.origin_url) + self.origin = Origin(url=self.origin_url) def prepare(self, *args, **kwargs): self.repo = dulwich.repo.Repo(self.directory) @@ -97,7 +101,7 @@ extra={ 'swh_type': 'swh_loader_git_missing_object', 'swh_object_id': _id, - 'origin_url': self.origin['url'], + 'origin_url': self.origin.url, }) return None except ObjectFormatException: @@ -106,7 +110,7 @@ extra={ 'swh_type': 'swh_loader_git_missing_object', 'swh_object_id': _id, - 'origin_url': self.origin['url'], + 'origin_url': self.origin.url, }) return None except EmptyFileException: @@ -115,7 +119,7 @@ extra={ 'swh_type': 'swh_loader_git_missing_object', 'swh_object_id': _id, - 'origin_url': self.origin['url'], + 'origin_url': self.origin.url, }) else: return obj @@ -123,7 +127,7 @@ def fetch_data(self): """Fetch the data from the data source""" previous_visit = self.storage.origin_visit_get_latest( - self.origin['url'], require_snapshot=True) + self.origin.url, require_snapshot=True) if previous_visit: self.previous_snapshot_id = previous_visit['snapshot'] else: @@ -213,25 +217,26 @@ def get_snapshot(self): """Turn the list of branches into a snapshot to load""" - branches = {} + branches: Dict[bytes, Optional[SnapshotBranch]] = {} for ref, target in self.repo.refs.as_dict().items(): obj = self.get_object(target) if obj: - branches[ref] = { - 'target': hashutil.bytehex_to_hash(target), - 'target_type': converters.DULWICH_TYPES[obj.type_name], - } + target_type = converters.DULWICH_TARGET_TYPES[obj.type_name] + branches[ref] = SnapshotBranch( + target=hashutil.bytehex_to_hash(target), + target_type=target_type, + ) else: branches[ref] = None for ref, target in self.repo.refs.get_symrefs().items(): - branches[ref] = { - 'target': target, - 'target_type': 'alias', - } + branches[ref] = SnapshotBranch( + target=target, + target_type=TargetType.ALIAS, + ) - self.snapshot = converters.branches_to_snapshot(branches) + self.snapshot = Snapshot(branches=branches) return self.snapshot def get_fetch_history_result(self): @@ -253,9 +258,9 @@ eventful = False if self.previous_snapshot_id: - eventful = self.snapshot['id'] != self.previous_snapshot_id + eventful = self.snapshot.id != self.previous_snapshot_id else: - eventful = bool(self.snapshot['branches']) + eventful = bool(self.snapshot.branches) return {'status': ('eventful' if eventful else 'uneventful')} diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py --- a/swh/loader/git/loader.py +++ b/swh/loader/git/loader.py @@ -3,21 +3,26 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from collections import defaultdict import datetime -import dulwich.client +from io import BytesIO import logging import os import pickle import sys +from typing import Any, Dict, Iterable, Optional -from collections import defaultdict -from io import BytesIO +import dulwich.client from dulwich.object_store import ObjectStoreGraphWalker from dulwich.pack import PackData, PackInflater from swh.model import hashutil +from swh.model.model import ( + BaseContent, Directory, Origin, Revision, + Release, Snapshot, SnapshotBranch, TargetType, Sha1Git) from swh.loader.core.loader import DVCSLoader from swh.storage.algos.snapshot import snapshot_get_all_branches + from . import converters @@ -53,15 +58,15 @@ def _cache_heads(self, base_snapshot): """Return all the known head commits for the given snapshot""" - _git_types = ['content', 'directory', 'revision', 'release'] + _git_types = list(converters.DULWICH_TARGET_TYPES.values()) if not base_snapshot: return [] snapshot_targets = set() - for target in base_snapshot['branches'].values(): - if target and target['target_type'] in _git_types: - snapshot_targets.add(target['target']) + for branch in base_snapshot.branches.values(): + if branch and branch.target_type in _git_types: + snapshot_targets.add(branch.target) decoded_targets = self._decode_from_storage(snapshot_targets) @@ -160,9 +165,14 @@ ) return ret - def find_remote_ref_types_in_swh(self, remote_refs): + def find_remote_ref_types_in_swh( + self, remote_refs) -> Dict[bytes, Dict[str, Any]]: """Parse the remote refs information and list the objects that exist in Software Heritage. + + Returns: + dict whose keys are branch names, and values are tuples + `(target, target_type)`. """ all_objs = set(remote_refs.values()) - set(self._type_cache) @@ -279,19 +289,22 @@ def prepare_origin_visit(self, *args, **kwargs): self.visit_date = datetime.datetime.now(tz=datetime.timezone.utc) - self.origin = converters.origin_url_to_origin(self.origin_url) + self.origin = Origin(url=self.origin_url) - def get_full_snapshot(self, origin_url): + def get_full_snapshot(self, origin_url) -> Optional[Snapshot]: visit = self.storage.origin_visit_get_latest( origin_url, require_snapshot=True) if visit and visit['snapshot']: - return snapshot_get_all_branches( + snapshot = snapshot_get_all_branches( self.storage, visit['snapshot']) else: + snapshot = None + if snapshot is None: return None + return Snapshot.from_dict(snapshot) def prepare(self, *args, **kwargs): - base_origin_url = origin_url = self.origin['url'] + base_origin_url = origin_url = self.origin.url prev_snapshot = None @@ -299,7 +312,7 @@ prev_snapshot = self.get_full_snapshot(origin_url) if self.base_url and not prev_snapshot: - base_origin = converters.origin_url_to_origin(self.base_url) + base_origin = Origin(url=self.base_url) base_origin = self.storage.origin_get(base_origin) if base_origin: base_origin_url = base_origin['url'] @@ -314,7 +327,7 @@ sys.stderr.flush() fetch_info = self.fetch_pack_from_origin( - self.origin['url'], self.base_snapshot, + self.origin.url, self.base_snapshot, do_progress) self.pack_buffer = fetch_info['pack_buffer'] @@ -324,7 +337,7 @@ self.local_refs = fetch_info['local_refs'] self.symbolic_refs = fetch_info['symbolic_refs'] - origin_url = self.origin['url'] + origin_url = self.origin.url self.log.info('Listed %d refs for repo %s' % ( len(self.remote_refs), origin_url), extra={ @@ -371,7 +384,7 @@ def has_contents(self): return bool(self.type_to_ids[b'blob']) - def get_content_ids(self): + def get_content_ids(self) -> Iterable[Dict[str, Any]]: """Get the content identifiers from the git repository""" for raw_obj in self.get_inflater(): if raw_obj.type_name != b'blob': @@ -379,7 +392,7 @@ yield converters.dulwich_blob_to_content_id(raw_obj) - def get_contents(self): + def get_contents(self) -> Iterable[BaseContent]: """Format the blobs from the git repository as swh contents""" missing_contents = set(self.storage.content_missing( self.get_content_ids(), 'sha1_git')) @@ -391,17 +404,18 @@ if raw_obj.sha().digest() not in missing_contents: continue - yield converters.dulwich_blob_to_content(raw_obj) + yield converters.dulwich_blob_to_content( + raw_obj, max_content_size=self.max_content_size) - def has_directories(self): + def has_directories(self) -> bool: return bool(self.type_to_ids[b'tree']) - def get_directory_ids(self): + def get_directory_ids(self) -> Iterable[Sha1Git]: """Get the directory identifiers from the git repository""" return (hashutil.hash_to_bytes(id.decode()) for id in self.type_to_ids[b'tree']) - def get_directories(self): + def get_directories(self) -> Iterable[Directory]: """Format the trees as swh directories""" missing_dirs = set(self.storage.directory_missing( sorted(self.get_directory_ids()))) @@ -415,15 +429,15 @@ yield converters.dulwich_tree_to_directory(raw_obj, log=self.log) - def has_revisions(self): + def has_revisions(self) -> bool: return bool(self.type_to_ids[b'commit']) - def get_revision_ids(self): + def get_revision_ids(self) -> Iterable[Sha1Git]: """Get the revision identifiers from the git repository""" return (hashutil.hash_to_bytes(id.decode()) for id in self.type_to_ids[b'commit']) - def get_revisions(self): + def get_revisions(self) -> Iterable[Revision]: """Format commits as swh revisions""" missing_revs = set(self.storage.revision_missing( sorted(self.get_revision_ids()))) @@ -437,15 +451,15 @@ yield converters.dulwich_commit_to_revision(raw_obj, log=self.log) - def has_releases(self): + def has_releases(self) -> bool: return bool(self.type_to_ids[b'tag']) - def get_release_ids(self): + def get_release_ids(self) -> Iterable[Sha1Git]: """Get the release identifiers from the git repository""" return (hashutil.hash_to_bytes(id.decode()) for id in self.type_to_ids[b'tag']) - def get_releases(self): + def get_releases(self) -> Iterable[Release]: """Retrieve all the release objects from the git repository""" missing_rels = set(self.storage.release_missing( sorted(self.get_release_ids()))) @@ -459,26 +473,33 @@ yield converters.dulwich_tag_to_release(raw_obj, log=self.log) - def get_snapshot(self): - branches = {} + def get_snapshot(self) -> Snapshot: + branches: Dict[bytes, Optional[SnapshotBranch]] = {} for ref in self.remote_refs: ret_ref = self.local_refs[ref].copy() if not ret_ref['target_type']: target_type = self.id_to_type[ret_ref['target']] - ret_ref['target_type'] = converters.DULWICH_TYPES[target_type] + ret_ref['target_type'] = \ + converters.DULWICH_TARGET_TYPES[target_type] ret_ref['target'] = hashutil.bytehex_to_hash(ret_ref['target']) - branches[ref] = ret_ref + branches[ref] = SnapshotBranch( + target_type=ret_ref['target_type'], + target=ret_ref['target'], + ) for ref, target in self.symbolic_refs.items(): - branches[ref] = {'target_type': 'alias', 'target': target} + branches[ref] = SnapshotBranch( + target_type=TargetType.ALIAS, + target=target, + ) - self.snapshot = converters.branches_to_snapshot(branches) + self.snapshot = Snapshot(branches=branches) return self.snapshot - def get_fetch_history_result(self): + def get_fetch_history_result(self) -> Dict[str, int]: return { 'contents': len(self.type_to_ids[b'blob']), 'directories': len(self.type_to_ids[b'tree']), @@ -486,15 +507,15 @@ 'releases': len(self.type_to_ids[b'tag']), } - def load_status(self): + def load_status(self) -> Dict[str, Any]: """The load was eventful if the current snapshot is different to the one we retrieved at the beginning of the run""" eventful = False if self.base_snapshot: - eventful = self.snapshot['id'] != self.base_snapshot['id'] + eventful = self.snapshot.id != self.base_snapshot.id else: - eventful = bool(self.snapshot['branches']) + eventful = bool(self.snapshot.branches) return {'status': ('eventful' if eventful else 'uneventful')} diff --git a/swh/loader/git/tests/__init__.py b/swh/loader/git/tests/__init__.py --- a/swh/loader/git/tests/__init__.py +++ b/swh/loader/git/tests/__init__.py @@ -7,9 +7,6 @@ 'storage': { 'cls': 'pipeline', 'steps': [ - { - 'cls': 'validate', - }, { 'cls': 'filter' }, diff --git a/swh/loader/git/tests/test_converters.py b/swh/loader/git/tests/test_converters.py --- a/swh/loader/git/tests/test_converters.py +++ b/swh/loader/git/tests/test_converters.py @@ -12,14 +12,19 @@ import dulwich.repo -import swh.loader.git.converters as converters from swh.model.hashutil import bytehex_to_hash, hash_to_bytes +from swh.model.model import ( + Content, Person, Release, Revision, RevisionType, ObjectType, + Timestamp, TimestampWithTimezone, +) + +import swh.loader.git.converters as converters TEST_DATA = os.path.join(os.path.dirname(__file__), 'data') -class SWHTargetType: - """Dulwich lookalike TargetType class +class SWHObjectType: + """Dulwich lookalike ObjectType class """ def __init__(self, type_name): @@ -34,7 +39,7 @@ tag_timezone, message): self.name = name self.type_name = type_name - self.object = SWHTargetType(target_type), target + self.object = SWHObjectType(target_type), target self.tagger = tagger self._message = message self.tag_time = tag_time @@ -79,28 +84,24 @@ shutil.rmtree(cls.repo_path) - def setUp(self): - super().setUp() - - self.blob_id = b'28c6f4023d65f74e3b59a2dea3c4277ed9ee07b0' - self.blob = { - 'sha1_git': bytehex_to_hash(self.blob_id), - 'sha1': hash_to_bytes('4850a3420a2262ff061cb296fb915430fa92301c'), - 'sha256': hash_to_bytes('fee7c8a485a10321ad94b64135073cb5' - '5f22cb9f57fa2417d2adfb09d310adef'), - 'blake2s256': hash_to_bytes('5d71873f42a137f6d89286e43677721e574' - '1fa05ce4cd5e3c7ea7c44d4c2d10b'), - 'data': (b'[submodule "example-dependency"]\n' - b'\tpath = example-dependency\n' - b'\turl = https://github.com/githubtraining/' - b'example-dependency.git\n'), - 'length': 124, - 'status': 'visible', - } - def test_blob_to_content(self): - content = converters.dulwich_blob_to_content(self.repo[self.blob_id]) - self.assertEqual(self.blob, content) + content_id = b'28c6f4023d65f74e3b59a2dea3c4277ed9ee07b0' + content = converters.dulwich_blob_to_content(self.repo[content_id]) + expected_content = Content( + sha1_git=bytehex_to_hash(content_id), + sha1=hash_to_bytes('4850a3420a2262ff061cb296fb915430fa92301c'), + sha256=hash_to_bytes('fee7c8a485a10321ad94b64135073cb5' + '5f22cb9f57fa2417d2adfb09d310adef'), + blake2s256=hash_to_bytes('5d71873f42a137f6d89286e43677721e574' + '1fa05ce4cd5e3c7ea7c44d4c2d10b'), + data=(b'[submodule "example-dependency"]\n' + b'\tpath = example-dependency\n' + b'\turl = https://github.com/githubtraining/' + b'example-dependency.git\n'), + length=124, + status='visible', + ) + self.assertEqual(content, expected_content) def test_convertion_wrong_input(self): class Something: @@ -115,87 +116,95 @@ } for _callable in m.values(): - self.assertIsNone(_callable(Something())) + with self.assertRaises(ValueError): + _callable(Something()) def test_commit_to_revision(self): sha1 = b'9768d0b576dbaaecd80abedad6dfd0d72f1476da' revision = converters.dulwich_commit_to_revision(self.repo[sha1]) - expected_revision = { - 'id': hash_to_bytes('9768d0b576dbaaecd80abedad6dfd0d72f1476da'), - 'directory': b'\xf0i\\./\xa7\xce\x9dW@#\xc3A7a\xa4s\xe5\x00\xca', - 'type': 'git', - 'committer': { - 'name': b'Stefano Zacchiroli', - 'fullname': b'Stefano Zacchiroli ', - 'email': b'zack@upsilon.cc', - }, - 'author': { - 'name': b'Stefano Zacchiroli', - 'fullname': b'Stefano Zacchiroli ', - 'email': b'zack@upsilon.cc', - }, - 'committer_date': { - 'negative_utc': None, - 'timestamp': 1443083765, - 'offset': 120, - }, - 'message': b'add submodule dependency\n', - 'metadata': None, - 'date': { - 'negative_utc': None, - 'timestamp': 1443083765, - 'offset': 120, - }, - 'parents': [ + expected_revision = Revision( + id=hash_to_bytes('9768d0b576dbaaecd80abedad6dfd0d72f1476da'), + directory=b'\xf0i\\./\xa7\xce\x9dW@#\xc3A7a\xa4s\xe5\x00\xca', + type=RevisionType.GIT, + committer=Person( + name=b'Stefano Zacchiroli', + fullname=b'Stefano Zacchiroli ', + email=b'zack@upsilon.cc', + ), + author=Person( + name=b'Stefano Zacchiroli', + fullname=b'Stefano Zacchiroli ', + email=b'zack@upsilon.cc', + ), + committer_date=TimestampWithTimezone( + timestamp=Timestamp( + seconds=1443083765, + microseconds=0, + ), + negative_utc=None, + offset=120, + ), + message=b'add submodule dependency\n', + metadata=None, + date=TimestampWithTimezone( + timestamp=Timestamp( + seconds=1443083765, + microseconds=0, + ), + negative_utc=None, + offset=120, + ), + parents=[ b'\xc3\xc5\x88q23`\x9f[\xbb\xb2\xd9\xe7\xf3\xfbJf\x0f?r' ], - 'synthetic': False, - } + synthetic=False, + ) self.assertEqual(revision, expected_revision) def test_author_line_to_author(self): # edge case out of the way - self.assertIsNone(converters.parse_author(None)) + with self.assertRaises(ValueError): + converters.parse_author(None) tests = { - b'a ': { - 'name': b'a', - 'email': b'b@c.com', - 'fullname': b'a ', - }, - b'': { - 'name': None, - 'email': b'foo@bar.com', - 'fullname': b'', - }, - b'malformed ': { - 'name': b'trailing', - 'email': b'sp@c.e', - 'fullname': b'trailing ', - }, - b'no': { - 'name': b'no', - 'email': b'sp@c.e', - 'fullname': b'no', - }, - b' <>': { - 'name': b'', - 'email': b'', - 'fullname': b' <>', - }, - b'something': { - 'name': None, - 'email': None, - 'fullname': b'something' - } + b'a ': Person( + name=b'a', + email=b'b@c.com', + fullname=b'a ', + ), + b'': Person( + name=None, + email=b'foo@bar.com', + fullname=b'', + ), + b'malformed ': Person( + name=b'trailing', + email=b'sp@c.e', + fullname=b'trailing ', + ), + b'no': Person( + name=b'no', + email=b'sp@c.e', + fullname=b'no', + ), + b' <>': Person( + name=b'', + email=b'', + fullname=b' <>', + ), + b'something': Person( + name=None, + email=None, + fullname=b'something' + ) } for author in sorted(tests): @@ -218,17 +227,17 @@ actual_release = converters.dulwich_tag_to_release(tag) # then - expected_release = { - 'author': None, - 'date': None, - 'id': b'\xda9\xa3\xee^kK\r2U\xbf\xef\x95`\x18\x90\xaf\xd8\x07\t', - 'message': message, - 'metadata': None, - 'name': 'blah', - 'synthetic': False, - 'target': hash_to_bytes(target.decode()), - 'target_type': 'revision' - } + expected_release = Release( + author=None, + date=None, + id=b'\xda9\xa3\xee^kK\r2U\xbf\xef\x95`\x18\x90\xaf\xd8\x07\t', + message=message, + metadata=None, + name='blah', + synthetic=False, + target=hash_to_bytes(target.decode()), + target_type=ObjectType.REVISION, + ) self.assertEqual(actual_release, expected_release) @@ -255,25 +264,28 @@ actual_release = converters.dulwich_tag_to_release(tag) # then - expected_release = { - 'author': { - 'email': b'hello@mail.org', - 'fullname': b'hey dude ', - 'name': b'hey dude' - }, - 'date': { - 'negative_utc': False, - 'offset': 0, - 'timestamp': 1196812800.0 - }, - 'id': b'\xda9\xa3\xee^kK\r2U\xbf\xef\x95`\x18\x90\xaf\xd8\x07\t', - 'message': message, - 'metadata': None, - 'name': 'blah', - 'synthetic': False, - 'target': hash_to_bytes(target.decode()), - 'target_type': 'revision' - } + expected_release = Release( + author=Person( + email=b'hello@mail.org', + fullname=b'hey dude ', + name=b'hey dude' + ), + date=TimestampWithTimezone( + negative_utc=False, + offset=0, + timestamp=Timestamp( + seconds=1196812800, + microseconds=0, + ) + ), + id=b'\xda9\xa3\xee^kK\r2U\xbf\xef\x95`\x18\x90\xaf\xd8\x07\t', + message=message, + metadata=None, + name='blah', + synthetic=False, + target=hash_to_bytes(target.decode()), + target_type=ObjectType.REVISION, + ) self.assertEqual(actual_release, expected_release) @@ -294,20 +306,20 @@ actual_release = converters.dulwich_tag_to_release(tag) # then - expected_release = { - 'author': { - 'email': b'hello@mail.org', - 'fullname': b'hey dude ', - 'name': b'hey dude' - }, - 'date': None, - 'id': b'\xda9\xa3\xee^kK\r2U\xbf\xef\x95`\x18\x90\xaf\xd8\x07\t', - 'message': message, - 'metadata': None, - 'name': 'blah', - 'synthetic': False, - 'target': hash_to_bytes(target.decode()), - 'target_type': 'revision' - } + expected_release = Release( + author=Person( + email=b'hello@mail.org', + fullname=b'hey dude ', + name=b'hey dude' + ), + date=None, + id=b'\xda9\xa3\xee^kK\r2U\xbf\xef\x95`\x18\x90\xaf\xd8\x07\t', + message=message, + metadata=None, + name='blah', + synthetic=False, + target=hash_to_bytes(target.decode()), + target_type=ObjectType.REVISION, + ) self.assertEqual(actual_release, expected_release)