Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/cassandra/converters.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import json | import json | ||||
import attr | import attr | ||||
from copy import deepcopy | from copy import deepcopy | ||||
from typing import Any, Dict, Tuple | from typing import Dict, Tuple | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
ObjectType, | ObjectType, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
Release, | Release, | ||||
Sha1Git, | Sha1Git, | ||||
) | ) | ||||
from swh.model.hashutil import DEFAULT_ALGORITHMS | from swh.model.hashutil import DEFAULT_ALGORITHMS | ||||
from .common import Row | from .common import remove_keys | ||||
from .model import OriginVisitRow, OriginVisitStatusRow, RevisionRow, ReleaseRow | |||||
def revision_to_db(revision: Revision) -> Dict[str, Any]: | def revision_to_db(revision: Revision) -> RevisionRow: | ||||
# we use a deepcopy of the dict because we do not want to recurse the | # we use a deepcopy of the dict because we do not want to recurse the | ||||
# Model->dict conversion (to keep Timestamp & al. entities), BUT we do not | # Model->dict conversion (to keep Timestamp & al. entities), BUT we do not | ||||
# want to modify original metadata (embedded in the Model entity), so we | # want to modify original metadata (embedded in the Model entity), so we | ||||
# non-recursively convert it as a dict but make a deep copy. | # non-recursively convert it as a dict but make a deep copy. | ||||
db_revision = deepcopy(attr.asdict(revision, recurse=False)) | db_revision = deepcopy(attr.asdict(revision, recurse=False)) | ||||
metadata = revision.metadata | metadata = revision.metadata | ||||
extra_headers = revision.extra_headers | extra_headers = revision.extra_headers | ||||
if not extra_headers and metadata and "extra_headers" in metadata: | if not extra_headers and metadata and "extra_headers" in metadata: | ||||
extra_headers = db_revision["metadata"].pop("extra_headers") | extra_headers = db_revision["metadata"].pop("extra_headers") | ||||
db_revision["metadata"] = json.dumps( | db_revision["metadata"] = json.dumps( | ||||
dict(db_revision["metadata"]) if db_revision["metadata"] is not None else None | dict(db_revision["metadata"]) if db_revision["metadata"] is not None else None | ||||
) | ) | ||||
db_revision["extra_headers"] = extra_headers | db_revision["extra_headers"] = extra_headers | ||||
db_revision["type"] = db_revision["type"].value | db_revision["type"] = db_revision["type"].value | ||||
return db_revision | return RevisionRow(**remove_keys(db_revision, ("parents",))) | ||||
def revision_from_db(db_revision: Row, parents: Tuple[Sha1Git, ...]) -> Revision: | def revision_from_db( | ||||
revision = db_revision._asdict() # type: ignore | db_revision: RevisionRow, parents: Tuple[Sha1Git, ...] | ||||
) -> Revision: | |||||
revision = db_revision.to_dict() | |||||
metadata = json.loads(revision.pop("metadata", None)) | metadata = json.loads(revision.pop("metadata", None)) | ||||
extra_headers = revision.pop("extra_headers", ()) | extra_headers = revision.pop("extra_headers", ()) | ||||
if not extra_headers and metadata and "extra_headers" in metadata: | if not extra_headers and metadata and "extra_headers" in metadata: | ||||
extra_headers = metadata.pop("extra_headers") | extra_headers = metadata.pop("extra_headers") | ||||
if extra_headers is None: | if extra_headers is None: | ||||
extra_headers = () | extra_headers = () | ||||
return Revision( | return Revision( | ||||
parents=parents, | parents=parents, | ||||
type=RevisionType(revision.pop("type")), | type=RevisionType(revision.pop("type")), | ||||
metadata=metadata, | metadata=metadata, | ||||
extra_headers=extra_headers, | extra_headers=extra_headers, | ||||
**revision, | **revision, | ||||
) | ) | ||||
def release_to_db(release: Release) -> Dict[str, Any]: | def release_to_db(release: Release) -> ReleaseRow: | ||||
db_release = attr.asdict(release, recurse=False) | db_release = attr.asdict(release, recurse=False) | ||||
db_release["target_type"] = db_release["target_type"].value | db_release["target_type"] = db_release["target_type"].value | ||||
return db_release | return ReleaseRow(**remove_keys(db_release, ("metadata",))) | ||||
def release_from_db(db_release: Row) -> Release: | def release_from_db(db_release: ReleaseRow) -> Release: | ||||
release = db_release._asdict() # type: ignore | release = db_release.to_dict() | ||||
return Release(target_type=ObjectType(release.pop("target_type")), **release,) | return Release(target_type=ObjectType(release.pop("target_type")), **release,) | ||||
def row_to_content_hashes(row: Row) -> Dict[str, bytes]: | def row_to_content_hashes(row: ReleaseRow) -> Dict[str, bytes]: | ||||
"""Convert cassandra row to a content hashes | """Convert cassandra row to a content hashes | ||||
""" | """ | ||||
hashes = {} | hashes = {} | ||||
for algo in DEFAULT_ALGORITHMS: | for algo in DEFAULT_ALGORITHMS: | ||||
hashes[algo] = getattr(row, algo) | hashes[algo] = getattr(row, algo) | ||||
return hashes | return hashes | ||||
def row_to_visit(row) -> OriginVisit: | def row_to_visit(row: OriginVisitRow) -> OriginVisit: | ||||
"""Format a row representing an origin_visit to an actual OriginVisit. | """Format a row representing an origin_visit to an actual OriginVisit. | ||||
""" | """ | ||||
return OriginVisit( | return OriginVisit( | ||||
origin=row.origin, | origin=row.origin, | ||||
visit=row.visit, | visit=row.visit, | ||||
date=row.date.replace(tzinfo=datetime.timezone.utc), | date=row.date.replace(tzinfo=datetime.timezone.utc), | ||||
type=row.type, | type=row.type, | ||||
) | ) | ||||
def row_to_visit_status(row) -> OriginVisitStatus: | def row_to_visit_status(row: OriginVisitStatusRow) -> OriginVisitStatus: | ||||
"""Format a row representing a visit_status to an actual OriginVisitStatus. | """Format a row representing a visit_status to an actual OriginVisitStatus. | ||||
""" | """ | ||||
return OriginVisitStatus.from_dict( | return OriginVisitStatus.from_dict( | ||||
{ | { | ||||
**row._asdict(), | **row.to_dict(), | ||||
"origin": row.origin, | |||||
"date": row.date.replace(tzinfo=datetime.timezone.utc), | "date": row.date.replace(tzinfo=datetime.timezone.utc), | ||||
"metadata": (json.loads(row.metadata) if row.metadata else None), | "metadata": (json.loads(row.metadata) if row.metadata else None), | ||||
} | } | ||||
) | ) | ||||
def visit_status_to_row(status: OriginVisitStatus) -> OriginVisitStatusRow: | |||||
d = status.to_dict() | |||||
return OriginVisitStatusRow.from_dict({**d, "metadata": json.dumps(d["metadata"])}) |