Changeset View
Changeset View
Standalone View
Standalone View
swh/model/model.py
# Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2018-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from abc import ABCMeta, abstractmethod | from abc import ABCMeta, abstractmethod | ||||
import datetime | import datetime | ||||
from enum import Enum | from enum import Enum | ||||
from hashlib import sha256 | from hashlib import sha256 | ||||
from typing import Any, Dict, Iterable, Optional, Tuple, TypeVar, Union | from typing import Any, Dict, Iterable, Optional, Tuple, TypeVar, Union | ||||
import warnings | |||||
import attr | import attr | ||||
from attrs_strict import type_validator | from attrs_strict import type_validator | ||||
import dateutil.parser | import dateutil.parser | ||||
import iso8601 | import iso8601 | ||||
from typing_extensions import Final | from typing_extensions import Final | ||||
from .collections import ImmutableDict | from .collections import ImmutableDict | ||||
▲ Show 20 Lines • Show All 830 Lines • ▼ Show 20 Lines | class MetadataTargetType(Enum): | ||||
DIRECTORY = "directory" | DIRECTORY = "directory" | ||||
REVISION = "revision" | REVISION = "revision" | ||||
RELEASE = "release" | RELEASE = "release" | ||||
SNAPSHOT = "snapshot" | SNAPSHOT = "snapshot" | ||||
ORIGIN = "origin" | ORIGIN = "origin" | ||||
@attr.s(frozen=True) | @attr.s(frozen=True) | ||||
class RawExtrinsicMetadata(BaseModel): | class _RawExtrinsicMetadata(BaseModel): | ||||
object_type: Final = "raw_extrinsic_metadata" | |||||
# target object | # target object | ||||
type = attr.ib(type=MetadataTargetType, validator=type_validator()) | type = attr.ib(type=MetadataTargetType, validator=type_validator()) | ||||
id = attr.ib(type=Union[str, SWHID], validator=type_validator()) | target = attr.ib(type=Union[str, SWHID], validator=type_validator()) | ||||
"""URL if type=MetadataTargetType.ORIGIN, else core SWHID""" | """URL if type=MetadataTargetType.ORIGIN, else core SWHID""" | ||||
# source | # source | ||||
discovery_date = attr.ib(type=datetime.datetime, validator=type_validator()) | discovery_date = attr.ib(type=datetime.datetime, validator=type_validator()) | ||||
authority = attr.ib(type=MetadataAuthority, validator=type_validator()) | authority = attr.ib(type=MetadataAuthority, validator=type_validator()) | ||||
fetcher = attr.ib(type=MetadataFetcher, validator=type_validator()) | fetcher = attr.ib(type=MetadataFetcher, validator=type_validator()) | ||||
# the metadata itself | # the metadata itself | ||||
format = attr.ib(type=str, validator=type_validator()) | format = attr.ib(type=str, validator=type_validator()) | ||||
metadata = attr.ib(type=bytes, validator=type_validator()) | metadata = attr.ib(type=bytes, validator=type_validator()) | ||||
# context | # context | ||||
origin = attr.ib(type=Optional[str], default=None, validator=type_validator()) | origin = attr.ib(type=Optional[str], default=None, validator=type_validator()) | ||||
visit = attr.ib(type=Optional[int], default=None, validator=type_validator()) | visit = attr.ib(type=Optional[int], default=None, validator=type_validator()) | ||||
snapshot = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) | snapshot = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) | ||||
release = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) | release = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) | ||||
revision = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) | revision = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) | ||||
path = attr.ib(type=Optional[bytes], default=None, validator=type_validator()) | path = attr.ib(type=Optional[bytes], default=None, validator=type_validator()) | ||||
directory = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) | directory = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) | ||||
@id.validator | @target.validator | ||||
def check_id(self, attribute, value): | def check_target(self, attribute, value): | ||||
if self.type == MetadataTargetType.ORIGIN: | if self.type == MetadataTargetType.ORIGIN: | ||||
if isinstance(value, SWHID) or value.startswith("swh:"): | if isinstance(value, SWHID) or value.startswith("swh:"): | ||||
raise ValueError( | raise ValueError( | ||||
"Got SWHID as id for origin metadata (expected an URL)." | "Got SWHID as target for origin metadata (expected an URL)." | ||||
) | ) | ||||
else: | else: | ||||
self._check_swhid(self.type.value, value) | self._check_swhid(self.type.value, value) | ||||
@discovery_date.validator | @discovery_date.validator | ||||
def check_discovery_date(self, attribute, value): | def check_discovery_date(self, attribute, value): | ||||
"""Checks the discovery_date has a timezone.""" | """Checks the discovery_date has a timezone.""" | ||||
if value is not None and value.tzinfo is None: | if value is not None and value.tzinfo is None: | ||||
▲ Show 20 Lines • Show All 122 Lines • ▼ Show 20 Lines | def _check_swhid(self, expected_object_type, swhid): | ||||
f"got '{swhid.object_type}' in {swhid}" | f"got '{swhid.object_type}' in {swhid}" | ||||
) | ) | ||||
if swhid.metadata: | if swhid.metadata: | ||||
raise ValueError(f"Expected core SWHID, but got: {swhid}") | raise ValueError(f"Expected core SWHID, but got: {swhid}") | ||||
def to_dict(self): | def to_dict(self): | ||||
d = super().to_dict() | d = super().to_dict() | ||||
d["id"] = d["target"] | |||||
context_keys = ( | context_keys = ( | ||||
"origin", | "origin", | ||||
"visit", | "visit", | ||||
"snapshot", | "snapshot", | ||||
"release", | "release", | ||||
"revision", | "revision", | ||||
"directory", | "directory", | ||||
"path", | "path", | ||||
) | ) | ||||
for context_key in context_keys: | for context_key in context_keys: | ||||
if d[context_key] is None: | if d[context_key] is None: | ||||
del d[context_key] | del d[context_key] | ||||
return d | return d | ||||
@classmethod | @classmethod | ||||
def from_dict(cls, d): | def from_dict(cls, d): | ||||
d = { | d = { | ||||
**d, | **d, | ||||
"type": MetadataTargetType(d["type"]), | "type": MetadataTargetType(d["type"]), | ||||
"authority": MetadataAuthority.from_dict(d["authority"]), | "authority": MetadataAuthority.from_dict(d["authority"]), | ||||
"fetcher": MetadataFetcher.from_dict(d["fetcher"]), | "fetcher": MetadataFetcher.from_dict(d["fetcher"]), | ||||
} | } | ||||
if "id" in d: | |||||
warnings.warn( | |||||
"RawExtrinsicMetadata `id` attribute is now called `target`", | |||||
DeprecationWarning, | |||||
) | |||||
# Backwards-compatibility for id -> target migration | |||||
d["target"] = d.pop("id") | |||||
if d["type"] != MetadataTargetType.ORIGIN: | if d["type"] != MetadataTargetType.ORIGIN: | ||||
d["id"] = parse_swhid(d["id"]) | d["target"] = parse_swhid(d["target"]) | ||||
swhid_keys = ("snapshot", "release", "revision", "directory") | swhid_keys = ("snapshot", "release", "revision", "directory") | ||||
for swhid_key in swhid_keys: | for swhid_key in swhid_keys: | ||||
if d.get(swhid_key): | if d.get(swhid_key): | ||||
d[swhid_key] = parse_swhid(d[swhid_key]) | d[swhid_key] = parse_swhid(d[swhid_key]) | ||||
return super().from_dict(d) | return super().from_dict(d) | ||||
def unique_key(self) -> KeyType: | def unique_key(self) -> KeyType: | ||||
return { | return { | ||||
"type": self.type.value, | "type": self.type.value, | ||||
"id": str(self.id), | "target": str(self.target), | ||||
"authority_type": self.authority.type.value, | "authority_type": self.authority.type.value, | ||||
"authority_url": self.authority.url, | "authority_url": self.authority.url, | ||||
"discovery_date": str(self.discovery_date), | "discovery_date": str(self.discovery_date), | ||||
"fetcher_name": self.fetcher.name, | "fetcher_name": self.fetcher.name, | ||||
"fetcher_version": self.fetcher.version, | "fetcher_version": self.fetcher.version, | ||||
} | } | ||||
class RawExtrinsicMetadata(_RawExtrinsicMetadata): | |||||
object_type: Final = "raw_extrinsic_metadata" | |||||
def __init__(self, **kwargs): | |||||
if "id" in kwargs: | |||||
warnings.warn( | |||||
"RawExtrinsicMetadata `id` attribute is now called `target`", | |||||
DeprecationWarning, | |||||
) | |||||
kwargs["target"] = kwargs.pop("id") | |||||
super().__init__(**kwargs) | |||||
@property | |||||
def id(self): | |||||
warnings.warn( | |||||
"RawExtrinsicMetadata `id` attribute is now called `target`", | |||||
DeprecationWarning, | |||||
) | |||||
return self.target |