Changeset View
Changeset View
Standalone View
Standalone View
swh/model/model.py
# Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
from abc import ABCMeta, abstractmethod | |||||
from enum import Enum | from enum import Enum | ||||
from typing import List, Optional, Dict | from typing import List, Optional, Dict | ||||
import attr | import attr | ||||
import dateutil.parser | import dateutil.parser | ||||
from .identifiers import normalize_timestamp | from .identifiers import ( | ||||
from .hashutil import DEFAULT_ALGORITHMS | normalize_timestamp, directory_identifier, revision_identifier, | ||||
release_identifier, snapshot_identifier | |||||
) | |||||
from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes | |||||
SHA1_SIZE = 20 | SHA1_SIZE = 20 | ||||
# TODO: Limit this to 20 bytes | # TODO: Limit this to 20 bytes | ||||
Sha1Git = bytes | Sha1Git = bytes | ||||
class BaseModel: | class BaseModel: | ||||
Show All 23 Lines | class BaseModel: | ||||
@classmethod | @classmethod | ||||
def from_dict(cls, d): | def from_dict(cls, d): | ||||
"""Takes a dictionary representing a tree of SWH objects, and | """Takes a dictionary representing a tree of SWH objects, and | ||||
recursively builds the corresponding objects.""" | recursively builds the corresponding objects.""" | ||||
return cls(**d) | return cls(**d) | ||||
class HashableObject(metaclass=ABCMeta): | |||||
"""Mixin to automatically compute object identifier hash when | |||||
the associated model is instantiated.""" | |||||
@staticmethod | |||||
@abstractmethod | |||||
def compute_hash(object_dict): | |||||
"""Derived model classes must implement this to compute | |||||
the object hash from its dict representation.""" | |||||
pass | |||||
def __attrs_post_init__(self): | |||||
if not self.id: | |||||
snp_id = hash_to_bytes(self.compute_hash(self.to_dict())) | |||||
object.__setattr__(self, 'id', snp_id) | |||||
@attr.s(frozen=True) | @attr.s(frozen=True) | ||||
class Person(BaseModel): | class Person(BaseModel): | ||||
"""Represents the author/committer of a revision or release.""" | """Represents the author/committer of a revision or release.""" | ||||
name = attr.ib(type=bytes) | name = attr.ib(type=bytes) | ||||
email = attr.ib(type=bytes) | email = attr.ib(type=bytes) | ||||
fullname = attr.ib(type=bytes) | fullname = attr.ib(type=bytes) | ||||
▲ Show 20 Lines • Show All 118 Lines • ▼ Show 20 Lines | class SnapshotBranch(BaseModel): | ||||
"""Represents one of the branches of a snapshot.""" | """Represents one of the branches of a snapshot.""" | ||||
target = attr.ib(type=bytes) | target = attr.ib(type=bytes) | ||||
target_type = attr.ib(type=TargetType) | target_type = attr.ib(type=TargetType) | ||||
@target.validator | @target.validator | ||||
def check_target(self, attribute, value): | def check_target(self, attribute, value): | ||||
"""Checks the target type is not an alias, checks the target is a | """Checks the target type is not an alias, checks the target is a | ||||
valid sha1_git.""" | valid sha1_git.""" | ||||
if self.target_type != TargetType.ALIAS: | if self.target_type != TargetType.ALIAS and self.target is not None: | ||||
if len(value) != 20: | if len(value) != 20: | ||||
raise ValueError('Wrong length for bytes identifier: %d' % | raise ValueError('Wrong length for bytes identifier: %d' % | ||||
len(value)) | len(value)) | ||||
@classmethod | @classmethod | ||||
def from_dict(cls, d): | def from_dict(cls, d): | ||||
return cls( | return cls( | ||||
target=d['target'], | target=d['target'], | ||||
target_type=TargetType(d['target_type'])) | target_type=TargetType(d['target_type'])) | ||||
@attr.s(frozen=True) | @attr.s(frozen=True) | ||||
class Snapshot(BaseModel): | class Snapshot(BaseModel, HashableObject): | ||||
"""Represents the full state of an origin at a given point in time.""" | """Represents the full state of an origin at a given point in time.""" | ||||
id = attr.ib(type=Sha1Git) | |||||
branches = attr.ib(type=Dict[bytes, Optional[SnapshotBranch]]) | branches = attr.ib(type=Dict[bytes, Optional[SnapshotBranch]]) | ||||
id = attr.ib(type=Sha1Git, default=b'') | |||||
@staticmethod | |||||
def compute_hash(object_dict): | |||||
return snapshot_identifier(object_dict) | |||||
@classmethod | @classmethod | ||||
def from_dict(cls, d): | def from_dict(cls, d): | ||||
d = d.copy() | |||||
return cls( | return cls( | ||||
id=d['id'], | |||||
branches={ | branches={ | ||||
name: SnapshotBranch.from_dict(branch) if branch else None | name: SnapshotBranch.from_dict(branch) if branch else None | ||||
for (name, branch) in d['branches'].items() | for (name, branch) in d.pop('branches').items() | ||||
}) | }, | ||||
**d) | |||||
vlorentz: You should do this, for consistency with other classes:
```
return cls(
branches={name: ... | |||||
@attr.s(frozen=True) | @attr.s(frozen=True) | ||||
class Release(BaseModel): | class Release(BaseModel, HashableObject): | ||||
id = attr.ib(type=Sha1Git) | |||||
name = attr.ib(type=bytes) | name = attr.ib(type=bytes) | ||||
message = attr.ib(type=bytes) | message = attr.ib(type=bytes) | ||||
target = attr.ib(type=Optional[Sha1Git]) | target = attr.ib(type=Optional[Sha1Git]) | ||||
target_type = attr.ib(type=ObjectType) | target_type = attr.ib(type=ObjectType) | ||||
synthetic = attr.ib(type=bool) | synthetic = attr.ib(type=bool) | ||||
author = attr.ib(type=Optional[Person], | author = attr.ib(type=Optional[Person], | ||||
default=None) | default=None) | ||||
date = attr.ib(type=Optional[TimestampWithTimezone], | date = attr.ib(type=Optional[TimestampWithTimezone], | ||||
default=None) | default=None) | ||||
metadata = attr.ib(type=Optional[Dict[str, object]], | metadata = attr.ib(type=Optional[Dict[str, object]], | ||||
default=None) | default=None) | ||||
id = attr.ib(type=Sha1Git, default=b'') | |||||
@staticmethod | |||||
def compute_hash(object_dict): | |||||
return release_identifier(object_dict) | |||||
@author.validator | @author.validator | ||||
def check_author(self, attribute, value): | def check_author(self, attribute, value): | ||||
"""If the author is `None`, checks the date is `None` too.""" | """If the author is `None`, checks the date is `None` too.""" | ||||
if self.author is None and self.date is not None: | if self.author is None and self.date is not None: | ||||
raise ValueError('release date must be None if author is None.') | raise ValueError('release date must be None if author is None.') | ||||
def to_dict(self): | def to_dict(self): | ||||
Show All 18 Lines | class RevisionType(Enum): | ||||
GIT = 'git' | GIT = 'git' | ||||
TAR = 'tar' | TAR = 'tar' | ||||
DSC = 'dsc' | DSC = 'dsc' | ||||
SUBVERSION = 'svn' | SUBVERSION = 'svn' | ||||
MERCURIAL = 'hg' | MERCURIAL = 'hg' | ||||
@attr.s(frozen=True) | @attr.s(frozen=True) | ||||
class Revision(BaseModel): | class Revision(BaseModel, HashableObject): | ||||
id = attr.ib(type=Sha1Git) | |||||
message = attr.ib(type=bytes) | message = attr.ib(type=bytes) | ||||
author = attr.ib(type=Person) | author = attr.ib(type=Person) | ||||
committer = attr.ib(type=Person) | committer = attr.ib(type=Person) | ||||
date = attr.ib(type=TimestampWithTimezone) | date = attr.ib(type=TimestampWithTimezone) | ||||
committer_date = attr.ib(type=TimestampWithTimezone) | committer_date = attr.ib(type=TimestampWithTimezone) | ||||
type = attr.ib(type=RevisionType) | type = attr.ib(type=RevisionType) | ||||
directory = attr.ib(type=Sha1Git) | directory = attr.ib(type=Sha1Git) | ||||
synthetic = attr.ib(type=bool) | synthetic = attr.ib(type=bool) | ||||
metadata = attr.ib(type=Optional[Dict[str, object]], | metadata = attr.ib(type=Optional[Dict[str, object]], | ||||
default=None) | default=None) | ||||
parents = attr.ib(type=List[Sha1Git], | parents = attr.ib(type=List[Sha1Git], | ||||
default=attr.Factory(list)) | default=attr.Factory(list)) | ||||
id = attr.ib(type=Sha1Git, default=b'') | |||||
@staticmethod | |||||
def compute_hash(object_dict): | |||||
return revision_identifier(object_dict) | |||||
@classmethod | @classmethod | ||||
def from_dict(cls, d): | def from_dict(cls, d): | ||||
d = d.copy() | d = d.copy() | ||||
return cls( | return cls( | ||||
id=d.pop('id'), | |||||
author=Person.from_dict(d.pop('author')), | author=Person.from_dict(d.pop('author')), | ||||
Not Done Inline ActionsI think you can remove this line entirely vlorentz: I think you can remove this line entirely | |||||
committer=Person.from_dict(d.pop('committer')), | committer=Person.from_dict(d.pop('committer')), | ||||
date=TimestampWithTimezone.from_dict(d.pop('date')), | date=TimestampWithTimezone.from_dict(d.pop('date')), | ||||
committer_date=TimestampWithTimezone.from_dict( | committer_date=TimestampWithTimezone.from_dict( | ||||
d.pop('committer_date')), | d.pop('committer_date')), | ||||
type=RevisionType(d.pop('type')), | type=RevisionType(d.pop('type')), | ||||
**d) | **d) | ||||
@attr.s(frozen=True) | @attr.s(frozen=True) | ||||
class DirectoryEntry(BaseModel): | class DirectoryEntry(BaseModel): | ||||
name = attr.ib(type=bytes) | name = attr.ib(type=bytes) | ||||
type = attr.ib(type=str, | type = attr.ib(type=str, | ||||
validator=attr.validators.in_(['file', 'dir', 'rev'])) | validator=attr.validators.in_(['file', 'dir', 'rev'])) | ||||
target = attr.ib(type=Sha1Git) | target = attr.ib(type=Sha1Git) | ||||
perms = attr.ib(type=int) | perms = attr.ib(type=int) | ||||
"""Usually one of the values of `swh.model.from_disk.DentryPerms`.""" | """Usually one of the values of `swh.model.from_disk.DentryPerms`.""" | ||||
@attr.s(frozen=True) | @attr.s(frozen=True) | ||||
class Directory(BaseModel): | class Directory(BaseModel): | ||||
id = attr.ib(type=Sha1Git) | |||||
entries = attr.ib(type=List[DirectoryEntry]) | entries = attr.ib(type=List[DirectoryEntry]) | ||||
id = attr.ib(type=Sha1Git, default=b'') | |||||
def __attrs_post_init__(self): | |||||
if not self.id: | |||||
dir_id = hash_to_bytes(directory_identifier(self.to_dict())) | |||||
Not Done Inline Actionsforgot this one vlorentz: forgot this one | |||||
Done Inline ActionsSigh .. I am really not efficient when I am sick and tired. anlambert: Sigh .. I am really not efficient when I am sick and tired. | |||||
object.__setattr__(self, 'id', dir_id) | |||||
@classmethod | @classmethod | ||||
def from_dict(cls, d): | def from_dict(cls, d): | ||||
d = d.copy() | |||||
return cls( | return cls( | ||||
id=d['id'], | |||||
entries=[DirectoryEntry.from_dict(entry) | entries=[DirectoryEntry.from_dict(entry) | ||||
for entry in d['entries']]) | for entry in d.pop('entries')], | ||||
**d) | |||||
Not Done Inline Actionssame vlorentz: same | |||||
@attr.s(frozen=True) | @attr.s(frozen=True) | ||||
class Content(BaseModel): | class Content(BaseModel): | ||||
sha1 = attr.ib(type=bytes) | sha1 = attr.ib(type=bytes) | ||||
sha1_git = attr.ib(type=Sha1Git) | sha1_git = attr.ib(type=Sha1Git) | ||||
sha256 = attr.ib(type=bytes) | sha256 = attr.ib(type=bytes) | ||||
blake2s256 = attr.ib(type=bytes) | blake2s256 = attr.ib(type=bytes) | ||||
▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines |
You should do this, for consistency with other classes: