diff --git a/swh/web/common/converters.py b/swh/web/common/converters.py --- a/swh/web/common/converters.py +++ b/swh/web/common/converters.py @@ -6,11 +6,11 @@ import datetime import json -from typing import Any, Dict +from typing import Any, Dict, Union from swh.core.utils import decode_with_escape from swh.model import hashutil -from swh.model.model import Release +from swh.model.model import Release, Revision from swh.storage.interface import PartialBranches from swh.web.common.typing import OriginInfo, OriginVisitInfo @@ -258,26 +258,11 @@ return json.loads(json.dumps(metadata, cls=SWHMetadataEncoder)) -def from_revision(revision): - """Convert from a swh revision to a json serializable revision dictionary. +def from_revision(revision: Union[Dict[str, Any], Revision]) -> Dict[str, Any]: + """Convert swh revision model object to a json serializable revision dictionary. Args: - revision (dict): dict with keys: - - - id: identifier of the revision (sha1 in bytes) - - directory: identifier of the directory the revision points to - (sha1 in bytes) - - author_name, author_email: author's revision name and email - - committer_name, committer_email: committer's revision name and - email - - message: revision's message - - date, date_offset: revision's author date - - committer_date, committer_date_offset: revision's commit date - - parents: list of parents for such revision - - synthetic: revision's property nature - - type: revision's type (git, tar or dsc at the moment) - - metadata: if the revision is synthetic, this can reference - dynamic properties. + revision: revision model object Returns: dict: Revision dictionary with the same keys as inputs, except: @@ -289,26 +274,30 @@ Remaining keys are left as is """ - revision = from_swh( - revision, + if isinstance(revision, Revision): + revision_d = revision.to_dict() + else: + revision_d = revision + revision_d = from_swh( + revision_d, hashess={"id", "directory", "parents", "children"}, - bytess={"name", "fullname", "email"}, + bytess={"name", "fullname", "email", "extra_headers"}, convert={"metadata"}, convert_fn=convert_revision_metadata, dates={"date", "committer_date"}, ) - if revision: - if "parents" in revision: - revision["merge"] = len(revision["parents"]) > 1 - if "message" in revision: + if revision_d: + if "parents" in revision_d: + revision_d["merge"] = len(revision_d["parents"]) > 1 + if "message" in revision_d: try: - revision["message"] = revision["message"].decode("utf-8") + revision_d["message"] = revision_d["message"].decode("utf-8") except UnicodeDecodeError: - revision["message_decoding_failed"] = True - revision["message"] = None + revision_d["message_decoding_failed"] = True + revision_d["message"] = None - return revision + return revision_d def from_content(content): diff --git a/swh/web/common/service.py b/swh/web/common/service.py --- a/swh/web/common/service.py +++ b/swh/web/common/service.py @@ -8,11 +8,11 @@ import re from collections import defaultdict -from typing import Any, Dict, List, Set, Iterable, Iterator, Optional, Tuple +from typing import Any, Dict, List, Set, Iterable, Iterator, Optional, Union, Tuple from swh.model import hashutil from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT -from swh.model.model import OriginVisit +from swh.model.model import OriginVisit, Revision from swh.storage.algos import diff, revisions_walker from swh.storage.algos.origin import origin_get_latest_visit_status from swh.storage.algos.snapshot import snapshot_get_latest @@ -489,7 +489,7 @@ yield None -def lookup_revision(rev_sha1_git): +def lookup_revision(rev_sha1_git) -> Dict[str, Any]: """Return information about the revision with sha1 revision_sha1_git. Args: @@ -504,32 +504,36 @@ """ sha1_git_bin = _to_sha1_bin(rev_sha1_git) - revision = _first_element(storage.revision_get([sha1_git_bin])) + revision = storage.revision_get([sha1_git_bin])[0] if not revision: - raise NotFoundExc("Revision with sha1_git %s not found." % rev_sha1_git) + raise NotFoundExc(f"Revision with sha1_git {rev_sha1_git} not found.") return converters.from_revision(revision) -def lookup_revision_multiple(sha1_git_list): +def lookup_revision_multiple(sha1_git_list) -> Iterator[Optional[Dict[str, Any]]]: """Return information about the revisions identified with their sha1_git identifiers. Args: sha1_git_list: A list of revision sha1_git identifiers - Returns: - Iterator of revisions information as dict. + Yields: + revision information as dict if the revision exists, None otherwise. Raises: ValueError if the identifier provided is not of sha1 nature. """ sha1_bin_list = [_to_sha1_bin(sha1_git) for sha1_git in sha1_git_list] - revisions = storage.revision_get(sha1_bin_list) or [] - return (converters.from_revision(r) for r in revisions) + revisions = storage.revision_get(sha1_bin_list) + for revision in revisions: + if revision is not None: + yield converters.from_revision(revision) + else: + yield None -def lookup_revision_message(rev_sha1_git): +def lookup_revision_message(rev_sha1_git) -> Dict[str, bytes]: """Return the raw message of the revision with sha1 revision_sha1_git. Args: @@ -544,14 +548,12 @@ """ sha1_git_bin = _to_sha1_bin(rev_sha1_git) - - revision = _first_element(storage.revision_get([sha1_git_bin])) + revision = storage.revision_get([sha1_git_bin])[0] if not revision: - raise NotFoundExc("Revision with sha1_git %s not found." % rev_sha1_git) - if "message" not in revision: - raise NotFoundExc("No message for revision with sha1_git %s." % rev_sha1_git) - res = {"message": revision["message"]} - return res + raise NotFoundExc(f"Revision with sha1_git {rev_sha1_git} not found.") + if not revision.message: + raise NotFoundExc(f"No message for revision with sha1_git {rev_sha1_git}.") + return {"message": revision.message} def _lookup_revision_id_by(origin, branch_name, timestamp): @@ -689,15 +691,16 @@ rev_root_id_bin = hashutil.hash_to_bytes(rev_root_id) - rev_root = _first_element(storage.revision_get([rev_root_id_bin])) - + rev_root = storage.revision_get([rev_root_id_bin])[0] return ( - converters.from_revision(rev_root), + converters.from_revision(rev_root) if rev_root else None, lookup_revision_with_context(rev_root, sha1_git, limit), ) -def lookup_revision_with_context(sha1_git_root, sha1_git, limit=100): +def lookup_revision_with_context( + sha1_git_root: Union[str, Dict[str, Any], Revision], sha1_git: str, limit: int = 100 +) -> Dict[str, Any]: """Return information about revision sha1_git, limited to the sub-graph of all transitive parents of sha1_git_root. @@ -721,22 +724,24 @@ """ sha1_git_bin = _to_sha1_bin(sha1_git) - revision = _first_element(storage.revision_get([sha1_git_bin])) + revision = storage.revision_get([sha1_git_bin])[0] if not revision: - raise NotFoundExc("Revision %s not found" % sha1_git) + raise NotFoundExc(f"Revision {sha1_git} not found") if isinstance(sha1_git_root, str): sha1_git_root_bin = _to_sha1_bin(sha1_git_root) - revision_root = _first_element(storage.revision_get([sha1_git_root_bin])) + revision_root = storage.revision_get([sha1_git_root_bin])[0] if not revision_root: - raise NotFoundExc("Revision root %s not found" % sha1_git_root) + raise NotFoundExc(f"Revision root {sha1_git_root} not found") + elif isinstance(sha1_git_root, Revision): + sha1_git_root_bin = sha1_git_root.id else: sha1_git_root_bin = sha1_git_root["id"] revision_log = storage.revision_log([sha1_git_root_bin], limit) - parents = {} + parents: Dict[str, List[str]] = {} children = defaultdict(list) for rev in revision_log: @@ -746,14 +751,12 @@ parents[rev_id].append(parent_id) children[parent_id].append(rev_id) - if revision["id"] not in parents: - raise NotFoundExc( - "Revision %s is not an ancestor of %s" % (sha1_git, sha1_git_root) - ) + if revision.id not in parents: + raise NotFoundExc(f"Revision {sha1_git} is not an ancestor of {sha1_git_root}") - revision["children"] = children[revision["id"]] - - return converters.from_revision(revision) + revision_d = revision.to_dict() + revision_d["children"] = children[revision.id] + return converters.from_revision(revision_d) def lookup_directory_with_revision(sha1_git, dir_path=None, with_data=False): @@ -779,10 +782,10 @@ """ sha1_git_bin = _to_sha1_bin(sha1_git) - revision = _first_element(storage.revision_get([sha1_git_bin])) + revision = storage.revision_get([sha1_git_bin])[0] if not revision: - raise NotFoundExc("Revision %s not found" % sha1_git) - dir_sha1_git_bin = revision["directory"] + raise NotFoundExc(f"Revision {sha1_git} not found") + dir_sha1_git_bin = revision.directory if dir_path: paths = dir_path.strip(os.path.sep).split(os.path.sep) entity = storage.directory_entry_get_by_path( @@ -819,12 +822,12 @@ "content": converters.from_content(content_d), } elif entity["type"] == "rev": # revision - revision = next(storage.revision_get([entity["target"]])) + revision = storage.revision_get([entity["target"]])[0] return { "type": "rev", "path": "." if not dir_path else dir_path, "revision": sha1_git, - "content": converters.from_revision(revision), + "content": converters.from_revision(revision) if revision else None, } else: raise NotImplementedError("Entity of type %s not implemented." % entity["type"]) diff --git a/swh/web/tests/common/test_converters.py b/swh/web/tests/common/test_converters.py --- a/swh/web/tests/common/test_converters.py +++ b/swh/web/tests/common/test_converters.py @@ -10,6 +10,8 @@ ObjectType, Person, Release, + Revision, + RevisionType, TimestampWithTimezone, Timestamp, ) @@ -253,7 +255,107 @@ assert actual_release == expected_release +def test_from_revision_model_object(): + ts = int( + datetime.datetime( + 2000, 1, 17, 11, 23, 54, tzinfo=datetime.timezone.utc + ).timestamp() + ) + revision_input = Revision( + directory=hashutil.hash_to_bytes("7834ef7e7c357ce2af928115c6c6a42b7e2a44e6"), + author=Person( + name=b"Software Heritage", + fullname=b"robot robot@softwareheritage.org", + email=b"robot@softwareheritage.org", + ), + committer=Person( + name=b"Software Heritage", + fullname=b"robot robot@softwareheritage.org", + email=b"robot@softwareheritage.org", + ), + message=b"synthetic revision message", + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=ts, microseconds=0), + offset=0, + negative_utc=False, + ), + committer_date=TimestampWithTimezone( + timestamp=Timestamp(seconds=ts, microseconds=0), + offset=0, + negative_utc=False, + ), + synthetic=True, + type=RevisionType.TAR, + parents=tuple( + [ + hashutil.hash_to_bytes("29d8be353ed3480476f032475e7c244eff7371d5"), + hashutil.hash_to_bytes("30d8be353ed3480476f032475e7c244eff7371d5"), + ] + ), + extra_headers=((b"gpgsig", b"some-signature"),), + metadata={ + "original_artifact": [ + { + "archive_type": "tar", + "name": "webbase-5.7.0.tar.gz", + "sha1": "147f73f369733d088b7a6fa9c4e0273dcd3c7ccd", + "sha1_git": "6a15ea8b881069adedf11feceec35588f2cfe8f1", + "sha256": "401d0df797110bea805d358b85bcc1ced29549d3d73f" + "309d36484e7edf7bb912", + } + ], + }, + ) + + expected_revision = { + "id": "a001358278a0d811fe7072463f805da601121c2a", + "directory": "7834ef7e7c357ce2af928115c6c6a42b7e2a44e6", + "author": { + "name": "Software Heritage", + "fullname": "robot robot@softwareheritage.org", + "email": "robot@softwareheritage.org", + }, + "committer": { + "name": "Software Heritage", + "fullname": "robot robot@softwareheritage.org", + "email": "robot@softwareheritage.org", + }, + "message": "synthetic revision message", + "date": "2000-01-17T11:23:54+00:00", + "committer_date": "2000-01-17T11:23:54+00:00", + "parents": tuple( + [ + "29d8be353ed3480476f032475e7c244eff7371d5", + "30d8be353ed3480476f032475e7c244eff7371d5", + ] + ), + "type": "tar", + "synthetic": True, + "extra_headers": (("gpgsig", "some-signature"),), + "metadata": { + "original_artifact": [ + { + "archive_type": "tar", + "name": "webbase-5.7.0.tar.gz", + "sha1": "147f73f369733d088b7a6fa9c4e0273dcd3c7ccd", + "sha1_git": "6a15ea8b881069adedf11feceec35588f2cfe8f1", + "sha256": "401d0df797110bea805d358b85bcc1ced29549d3d73f" + "309d36484e7edf7bb912", + } + ], + }, + "merge": True, + } + + actual_revision = converters.from_revision(revision_input) + + assert actual_revision == expected_revision + + def test_from_revision(): + ts = datetime.datetime( + 2000, 1, 17, 11, 23, 54, tzinfo=datetime.timezone.utc + ).timestamp() revision_input = { "id": hashutil.hash_to_bytes("18d8be353ed3480476f032475e7c233eff7371d5"), "directory": hashutil.hash_to_bytes("7834ef7e7c357ce2af928115c6c6a42b7e2a44e6"), @@ -268,20 +370,8 @@ "email": b"robot@softwareheritage.org", }, "message": b"synthetic revision message", - "date": { - "timestamp": datetime.datetime( - 2000, 1, 17, 11, 23, 54, tzinfo=datetime.timezone.utc - ).timestamp(), - "offset": 0, - "negative_utc": False, - }, - "committer_date": { - "timestamp": datetime.datetime( - 2000, 1, 17, 11, 23, 54, tzinfo=datetime.timezone.utc - ).timestamp(), - "offset": 0, - "negative_utc": False, - }, + "date": {"timestamp": ts, "offset": 0, "negative_utc": False,}, + "committer_date": {"timestamp": ts, "offset": 0, "negative_utc": False,}, "synthetic": True, "type": "tar", "parents": [ diff --git a/swh/web/tests/conftest.py b/swh/web/tests/conftest.py --- a/swh/web/tests/conftest.py +++ b/swh/web/tests/conftest.py @@ -210,10 +210,10 @@ rel_data = self.storage.release_get([rel_id_bytes])[0] return converters.from_release(rel_data) if rel_data else None - def revision_get(self, rev_id): + def revision_get(self, rev_id: str) -> Optional[Dict[str, Any]]: rev_id_bytes = hash_to_bytes(rev_id) - rev_data = next(self.storage.revision_get([rev_id_bytes])) - return converters.from_revision(rev_data) + rev_data = self.storage.revision_get([rev_id_bytes])[0] + return converters.from_revision(rev_data) if rev_data else None def revision_log(self, rev_id, limit=None): rev_id_bytes = hash_to_bytes(rev_id) diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py --- a/swh/web/tests/data.py +++ b/swh/web/tests/data.py @@ -236,7 +236,9 @@ revisions.add(rev_id) for rev in storage.revision_get(revisions): - dir_id = rev["directory"] + if rev is None: + continue + dir_id = rev.directory directories.add(hash_to_hex(dir_id)) for entry in dir_iterator(storage, dir_id): if entry["type"] == "file": diff --git a/swh/web/tests/strategies.py b/swh/web/tests/strategies.py --- a/swh/web/tests/strategies.py +++ b/swh/web/tests/strategies.py @@ -368,7 +368,7 @@ into the test archive. """ return sha1().filter( - lambda s: next(get_tests_data()["storage"].revision_get([hash_to_bytes(s)])) + lambda s: get_tests_data()["storage"].revision_get([hash_to_bytes(s)])[0] is None )