diff --git a/swh/storage/backfill.py b/swh/storage/backfill.py --- a/swh/storage/backfill.py +++ b/swh/storage/backfill.py @@ -205,7 +205,7 @@ compatible objects. """ - return db_to_revision(revision) + return db_to_revision(revision).to_dict() def release_converter(db, release): @@ -213,7 +213,7 @@ compatible objects. """ - return db_to_release(release) + return db_to_release(release).to_dict() def snapshot_converter(db, snapshot): diff --git a/swh/storage/converters.py b/swh/storage/converters.py --- a/swh/storage/converters.py +++ b/swh/storage/converters.py @@ -8,14 +8,20 @@ from typing import Any, Optional, Dict from swh.core.utils import encode_with_unescape -from swh.model import identifiers from swh.model.identifiers import parse_swhid from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, MetadataTargetType, + ObjectType, + Person, RawExtrinsicMetadata, + Release, + Revision, + RevisionType, + Timestamp, + TimestampWithTimezone, ) from swh.model.hashutil import MultiHash @@ -35,7 +41,7 @@ } -def author_to_db(author): +def author_to_db(author: Optional[Person]) -> Dict[str, Any]: """Convert a swh-model author to its DB representation. Args: @@ -48,12 +54,12 @@ if author is None: return DEFAULT_AUTHOR - return author + return author.to_dict() def db_to_author( fullname: Optional[bytes], name: Optional[bytes], email: Optional[bytes] -) -> Optional[Dict[str, Optional[bytes]]]: +) -> Optional[Person]: """Convert the DB representation of an author to a swh-model author. Args: @@ -62,16 +68,11 @@ email (bytes): the author's email Returns: - a dictionary with three keys (fullname, name and email), or - None if all the arguments are None. + a Person object, or None if 'fullname' is None. """ - if (fullname, name, email) == (None, None, None): + if fullname is None: return None - return { - "fullname": fullname, - "name": name, - "email": email, - } + return Person(fullname=fullname, name=name, email=email,) def db_to_git_headers(db_git_headers): @@ -81,41 +82,38 @@ return ret -def db_to_date(date, offset, neg_utc_offset): +def db_to_date( + date: Optional[datetime.datetime], offset: int, neg_utc_offset: bool +) -> Optional[TimestampWithTimezone]: """Convert the DB representation of a date to a swh-model compatible date. Args: - date (datetime.datetime): a date pulled out of the database - offset (int): an integer number of minutes representing an UTC offset - neg_utc_offset (boolean): whether an utc offset is negative + date: a date pulled out of the database + offset: an integer number of minutes representing an UTC offset + neg_utc_offset: whether an utc offset is negative Returns: - dict: a dict with three keys: - - - timestamp: a timestamp from UTC - - offset: the number of minutes since UTC - - negative_utc: whether a null UTC offset is negative + a TimestampWithTimezone, or None if the date is None. """ if date is None: return None - return { - "timestamp": { - "seconds": int(date.timestamp()), - "microseconds": date.microsecond, - }, - "offset": offset, - "negative_utc": neg_utc_offset, - } + return TimestampWithTimezone( + timestamp=Timestamp( + seconds=int(date.timestamp()), microseconds=date.microsecond, + ), + offset=offset, + negative_utc=neg_utc_offset, + ) -def date_to_db(date_offset): +def date_to_db(ts_with_tz: Optional[TimestampWithTimezone]) -> Dict[str, Any]: """Convert a swh-model date_offset to its DB representation. Args: - date_offset: a :mod:`swh.model` compatible date_offset + ts_with_tz: a TimestampWithTimezone object Returns: dict: a dictionary with three keys: @@ -127,38 +125,33 @@ """ - if date_offset is None: + if ts_with_tz is None: return DEFAULT_DATE - normalized = identifiers.normalize_timestamp(date_offset) - - ts = normalized["timestamp"] - seconds = ts.get("seconds", 0) - microseconds = ts.get("microseconds", 0) + ts = ts_with_tz.timestamp - timestamp = datetime.datetime.fromtimestamp(seconds, datetime.timezone.utc) - timestamp = timestamp.replace(microsecond=microseconds) + timestamp = datetime.datetime.fromtimestamp(ts.seconds, datetime.timezone.utc) + timestamp = timestamp.replace(microsecond=ts.microseconds) return { # PostgreSQL supports isoformatted timestamps "timestamp": timestamp.isoformat(), - "offset": normalized["offset"], - "neg_utc_offset": normalized["negative_utc"], + "offset": ts_with_tz.offset, + "neg_utc_offset": ts_with_tz.negative_utc, } -def revision_to_db(rev): +def revision_to_db(revision: Revision) -> Dict[str, Any]: """Convert a swh-model revision to its database representation. """ - revision = rev.to_dict() - author = author_to_db(revision["author"]) - date = date_to_db(revision["date"]) - committer = author_to_db(revision["committer"]) - committer_date = date_to_db(revision["committer_date"]) + author = author_to_db(revision.author) + date = date_to_db(revision.date) + committer = author_to_db(revision.committer) + committer_date = date_to_db(revision.committer_date) return { - "id": revision["id"], + "id": revision.id, "author_fullname": author["fullname"], "author_name": author["name"], "author_email": author["email"], @@ -171,22 +164,27 @@ "committer_date": committer_date["timestamp"], "committer_date_offset": committer_date["offset"], "committer_date_neg_utc_offset": committer_date["neg_utc_offset"], - "type": revision["type"], - "directory": revision["directory"], - "message": revision["message"], - "metadata": revision["metadata"], - "synthetic": revision["synthetic"], - "extra_headers": revision["extra_headers"], + "type": revision.type.value, + "directory": revision.directory, + "message": revision.message, + "metadata": None if revision.metadata is None else dict(revision.metadata), + "synthetic": revision.synthetic, + "extra_headers": revision.extra_headers, "parents": [ - {"id": revision["id"], "parent_id": parent, "parent_rank": i,} - for i, parent in enumerate(revision["parents"]) + {"id": revision.id, "parent_id": parent, "parent_rank": i,} + for i, parent in enumerate(revision.parents) ], } -def db_to_revision(db_revision: Dict[str, Any]) -> Dict[str, Any]: +def db_to_revision(db_revision: Dict[str, Any]) -> Optional[Revision]: """Convert a database representation of a revision to its swh-model representation.""" + if db_revision["type"] is None: + assert all( + v is None for (k, v) in db_revision.items() if k not in ("id", "parents") + ) + return None author = db_to_author( db_revision["author_fullname"], @@ -210,6 +208,9 @@ db_revision["committer_date_neg_utc_offset"], ) + assert author, "author is None" + assert committer, "committer is None" + parents = [] if "parents" in db_revision: for parent in db_revision["parents"]: @@ -221,56 +222,51 @@ if not extra_headers and metadata and "extra_headers" in metadata: extra_headers = db_to_git_headers(metadata.pop("extra_headers")) - ret = { - "id": db_revision["id"], - "author": author, - "date": date, - "committer": committer, - "committer_date": committer_date, - "type": db_revision["type"], - "directory": db_revision["directory"], - "message": db_revision["message"], - "metadata": metadata, - "synthetic": db_revision["synthetic"], - "extra_headers": extra_headers, - "parents": parents, - } - - if "object_id" in db_revision: - ret["object_id"] = db_revision["object_id"] - - return ret + return Revision( + id=db_revision["id"], + author=author, + date=date, + committer=committer, + committer_date=committer_date, + type=RevisionType(db_revision["type"]), + directory=db_revision["directory"], + message=db_revision["message"], + metadata=metadata, + synthetic=db_revision["synthetic"], + extra_headers=extra_headers, + parents=tuple(parents), + ) -def release_to_db(rel): +def release_to_db(release: Release) -> Dict[str, Any]: """Convert a swh-model release to its database representation. """ - - release = rel.to_dict() - - author = author_to_db(release["author"]) - date = date_to_db(release["date"]) + author = author_to_db(release.author) + date = date_to_db(release.date) return { - "id": release["id"], + "id": release.id, "author_fullname": author["fullname"], "author_name": author["name"], "author_email": author["email"], "date": date["timestamp"], "date_offset": date["offset"], "date_neg_utc_offset": date["neg_utc_offset"], - "name": release["name"], - "target": release["target"], - "target_type": release["target_type"], - "comment": release["message"], - "synthetic": release["synthetic"], + "name": release.name, + "target": release.target, + "target_type": release.target_type.value, + "comment": release.message, + "synthetic": release.synthetic, } -def db_to_release(db_release): +def db_to_release(db_release: Dict[str, Any]) -> Optional[Release]: """Convert a database representation of a release to its swh-model representation. """ + if db_release["target_type"] is None: + assert all(v is None for (k, v) in db_release.items() if k != "id") + return None author = db_to_author( db_release["author_fullname"], @@ -281,21 +277,16 @@ db_release["date"], db_release["date_offset"], db_release["date_neg_utc_offset"] ) - ret = { - "author": author, - "date": date, - "id": db_release["id"], - "name": db_release["name"], - "message": db_release["comment"], - "synthetic": db_release["synthetic"], - "target": db_release["target"], - "target_type": db_release["target_type"], - } - - if "object_id" in db_release: - ret["object_id"] = db_release["object_id"] - - return ret + return Release( + author=author, + date=date, + id=db_release["id"], + name=db_release["name"], + message=db_release["comment"], + synthetic=db_release["synthetic"], + target=db_release["target"], + target_type=ObjectType(db_release["target_type"]), + ) def db_to_raw_extrinsic_metadata(row) -> RawExtrinsicMetadata: @@ -326,6 +317,6 @@ ) -def origin_url_to_sha1(origin_url): +def origin_url_to_sha1(origin_url: str) -> bytes: """Convert an origin URL to a sha1. Encodes URL to utf-8.""" return MultiHash.from_data(origin_url.encode("utf-8"), {"sha1"}).digest()["sha1"] diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -568,13 +568,13 @@ self.journal_writer.revision_add(revisions_filtered) - revisions_filtered = list(map(converters.revision_to_db, revisions_filtered)) + db_revisions_filtered = list(map(converters.revision_to_db, revisions_filtered)) parents_filtered: List[bytes] = [] with convert_validation_exceptions(): db.copy_to( - revisions_filtered, + db_revisions_filtered, "tmp_revision", db.revision_add_cols, cur, @@ -610,10 +610,10 @@ ) -> Iterable[Optional[Dict[str, Any]]]: for line in db.revision_get_from_list(revisions, cur): data = converters.db_to_revision(dict(zip(db.revision_get_cols, line))) - if not data["type"]: + if not data: yield None continue - yield data + yield data.to_dict() @timed @db_transaction_generator(statement_timeout=2000) @@ -622,10 +622,10 @@ ) -> Iterable[Optional[Dict[str, Any]]]: for line in db.revision_log(revisions, limit, cur): data = converters.db_to_revision(dict(zip(db.revision_get_cols, line))) - if not data["type"]: + if not data: yield None continue - yield data + yield data.to_dict() @timed @db_transaction_generator(statement_timeout=2000) @@ -659,10 +659,10 @@ self.journal_writer.release_add(releases_filtered) - releases_filtered = list(map(converters.release_to_db, releases_filtered)) + db_releases_filtered = list(map(converters.release_to_db, releases_filtered)) with convert_validation_exceptions(): - db.copy_to(releases_filtered, "tmp_release", db.release_add_cols, cur) + db.copy_to(db_releases_filtered, "tmp_release", db.release_add_cols, cur) db.release_add_from_temp(cur) @@ -686,7 +686,7 @@ ) -> Iterable[Optional[Dict[str, Any]]]: for release in db.release_get_from_list(releases, cur): data = converters.db_to_release(dict(zip(db.release_get_cols, release))) - yield data if data["target_type"] else None + yield data.to_dict() if data else None @timed @db_transaction() diff --git a/swh/storage/tests/test_converters.py b/swh/storage/tests/test_converters.py --- a/swh/storage/tests/test_converters.py +++ b/swh/storage/tests/test_converters.py @@ -3,6 +3,16 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from swh.model.model import ( + ObjectType, + Person, + Release, + Revision, + RevisionType, + Timestamp, + TimestampWithTimezone, +) + from swh.storage import converters @@ -11,7 +21,11 @@ assert date_to_db(None) == {"timestamp": None, "offset": 0, "neg_utc_offset": None} assert date_to_db( - {"timestamp": 1234567890, "offset": 120, "negative_utc": False,} + TimestampWithTimezone( + timestamp=Timestamp(seconds=1234567890, microseconds=0,), + offset=120, + negative_utc=False, + ) ) == { "timestamp": "2009-02-13T23:31:30+00:00", "offset": 120, @@ -19,7 +33,11 @@ } assert date_to_db( - {"timestamp": 1123456789, "offset": 0, "negative_utc": True,} + TimestampWithTimezone( + timestamp=Timestamp(seconds=1123456789, microseconds=0,), + offset=0, + negative_utc=True, + ) ) == { "timestamp": "2005-08-07T23:19:49+00:00", "offset": 0, @@ -27,7 +45,11 @@ } assert date_to_db( - {"timestamp": 1234567890, "offset": 42, "negative_utc": False,} + TimestampWithTimezone( + timestamp=Timestamp(seconds=1234567890, microseconds=0,), + offset=42, + negative_utc=False, + ) ) == { "timestamp": "2009-02-13T23:31:30+00:00", "offset": 42, @@ -35,7 +57,11 @@ } assert date_to_db( - {"timestamp": 1634366813, "offset": -120, "negative_utc": False,} + TimestampWithTimezone( + timestamp=Timestamp(seconds=1634366813, microseconds=0,), + offset=-120, + negative_utc=False, + ) ) == { "timestamp": "2021-10-16T06:46:53+00:00", "offset": -120, @@ -48,11 +74,7 @@ actual_author = converters.db_to_author(b"fullname", b"name", b"email") # then - assert actual_author == { - "fullname": b"fullname", - "name": b"name", - "email": b"email", - } + assert actual_author == Person(fullname=b"fullname", name=b"name", email=b"email",) def test_db_to_author_none(): @@ -91,28 +113,24 @@ ) # then - assert actual_revision == { - "id": b"revision-id", - "author": { - "fullname": b"auth-fullname", - "name": b"auth-name", - "email": b"auth-email", - }, - "date": None, - "committer": { - "fullname": b"comm-fullname", - "name": b"comm-name", - "email": b"comm-email", - }, - "committer_date": None, - "type": "git", - "directory": b"dir-sha1", - "message": b"commit message", - "metadata": {}, - "synthetic": False, - "extra_headers": (), - "parents": [b"123", b"456"], - } + assert actual_revision == Revision( + id=b"revision-id", + author=Person( + fullname=b"auth-fullname", name=b"auth-name", email=b"auth-email", + ), + date=None, + committer=Person( + fullname=b"comm-fullname", name=b"comm-name", email=b"comm-email", + ), + committer_date=None, + type=RevisionType.GIT, + directory=b"dir-sha1", + message=b"commit message", + metadata={}, + synthetic=False, + extra_headers=(), + parents=(b"123", b"456"), + ) def test_db_to_release(): @@ -135,17 +153,15 @@ ) # then - assert actual_release == { - "author": { - "fullname": b"auth-fullname", - "name": b"auth-name", - "email": b"auth-email", - }, - "date": None, - "id": b"release-id", - "name": b"release-name", - "message": b"release comment", - "synthetic": True, - "target": b"revision-id", - "target_type": "revision", - } + assert actual_release == Release( + author=Person( + fullname=b"auth-fullname", name=b"auth-name", email=b"auth-email", + ), + date=None, + id=b"release-id", + name=b"release-name", + message=b"release comment", + synthetic=True, + target=b"revision-id", + target_type=ObjectType.REVISION, + )