Changeset View
Changeset View
Standalone View
Standalone View
swh/model/model.py
# Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
from abc import ABCMeta, abstractmethod | from abc import ABCMeta, abstractmethod | ||||
from enum import Enum | from enum import Enum | ||||
from hashlib import sha256 | |||||
from typing import Dict, List, Optional, Union | from typing import Dict, List, Optional, Union | ||||
import attr | import attr | ||||
from attrs_strict import type_validator | from attrs_strict import type_validator | ||||
import dateutil.parser | import dateutil.parser | ||||
import iso8601 | import iso8601 | ||||
from .identifiers import ( | from .identifiers import ( | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | def to_dict(self): | ||||
return dictify(attr.asdict(self, recurse=False)) | return dictify(attr.asdict(self, recurse=False)) | ||||
@classmethod | @classmethod | ||||
def from_dict(cls, d): | def from_dict(cls, d): | ||||
"""Takes a dictionary representing a tree of SWH objects, and | """Takes a dictionary representing a tree of SWH objects, and | ||||
recursively builds the corresponding objects.""" | recursively builds the corresponding objects.""" | ||||
return cls(**d) | return cls(**d) | ||||
def anonymize(self) -> Optional["BaseModel"]: | |||||
"""Returns an anonymized version of the object, if needed. | |||||
If the object model does not need/support anonymization, returns None. | |||||
""" | |||||
return None | |||||
class HashableObject(metaclass=ABCMeta): | class HashableObject(metaclass=ABCMeta): | ||||
"""Mixin to automatically compute object identifier hash when | """Mixin to automatically compute object identifier hash when | ||||
the associated model is instantiated.""" | the associated model is instantiated.""" | ||||
@staticmethod | @staticmethod | ||||
@abstractmethod | @abstractmethod | ||||
def compute_hash(object_dict): | def compute_hash(object_dict): | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | def from_fullname(cls, fullname: bytes): | ||||
close_bracket = raw_email.rindex(b">") | close_bracket = raw_email.rindex(b">") | ||||
except ValueError: | except ValueError: | ||||
email = raw_email | email = raw_email | ||||
else: | else: | ||||
email = raw_email[:close_bracket] | email = raw_email[:close_bracket] | ||||
return Person(name=name or None, email=email or None, fullname=fullname,) | return Person(name=name or None, email=email or None, fullname=fullname,) | ||||
def anonymize(self) -> "Person": | |||||
"""Returns an anonymized version of the Person object. | |||||
Anonymization is simply a Person which fullname is the hashed, with unset name | |||||
or email. | |||||
""" | |||||
return Person(fullname=sha256(self.fullname).digest(), name=None, email=None,) | |||||
olasd: `name` and `email` are just display helpers. The anonymous version should probably only hash… | |||||
Done Inline Actionsright, I read the code identifier.py too fast and thought all 3 were concatenated for hash computation, but they are not as you point out. douardda: right, I read the code identifier.py too fast and thought all 3 were concatenated for hash… | |||||
@attr.s(frozen=True) | @attr.s(frozen=True) | ||||
class Timestamp(BaseModel): | class Timestamp(BaseModel): | ||||
"""Represents a naive timestamp from a VCS.""" | """Represents a naive timestamp from a VCS.""" | ||||
seconds = attr.ib(type=int, validator=type_validator()) | seconds = attr.ib(type=int, validator=type_validator()) | ||||
microseconds = attr.ib(type=int, validator=type_validator()) | microseconds = attr.ib(type=int, validator=type_validator()) | ||||
▲ Show 20 Lines • Show All 224 Lines • ▼ Show 20 Lines | class Release(BaseModel, HashableObject): | ||||
def from_dict(cls, d): | def from_dict(cls, d): | ||||
d = d.copy() | d = d.copy() | ||||
if d.get("author"): | if d.get("author"): | ||||
d["author"] = Person.from_dict(d["author"]) | d["author"] = Person.from_dict(d["author"]) | ||||
if d.get("date"): | if d.get("date"): | ||||
d["date"] = TimestampWithTimezone.from_dict(d["date"]) | d["date"] = TimestampWithTimezone.from_dict(d["date"]) | ||||
return cls(target_type=ObjectType(d.pop("target_type")), **d) | return cls(target_type=ObjectType(d.pop("target_type")), **d) | ||||
def anonymize(self) -> "Release": | |||||
"""Returns an anonymized version of the Release object. | |||||
Not Done Inline Actionsconsists in ardumont: consists in | |||||
Anonymization consists in replacing the author with an anonymized Person object. | |||||
""" | |||||
author = self.author and self.author.anonymize() | |||||
return attr.evolve(self, author=author) | |||||
class RevisionType(Enum): | class RevisionType(Enum): | ||||
GIT = "git" | GIT = "git" | ||||
TAR = "tar" | TAR = "tar" | ||||
DSC = "dsc" | DSC = "dsc" | ||||
SUBVERSION = "svn" | SUBVERSION = "svn" | ||||
MERCURIAL = "hg" | MERCURIAL = "hg" | ||||
Show All 37 Lines | def from_dict(cls, d): | ||||
author=Person.from_dict(d.pop("author")), | author=Person.from_dict(d.pop("author")), | ||||
committer=Person.from_dict(d.pop("committer")), | committer=Person.from_dict(d.pop("committer")), | ||||
date=date, | date=date, | ||||
committer_date=committer_date, | committer_date=committer_date, | ||||
type=RevisionType(d.pop("type")), | type=RevisionType(d.pop("type")), | ||||
**d, | **d, | ||||
) | ) | ||||
def anonymize(self) -> "Revision": | |||||
"""Returns an anonymized version of the Revision object. | |||||
Not Done Inline Actionsconsists in ardumont: consists in | |||||
Anonymization consists in replacing the author and committer with an anonymized | |||||
Person object. | |||||
""" | |||||
return attr.evolve( | |||||
self, author=self.author.anonymize(), committer=self.committer.anonymize() | |||||
) | |||||
@attr.s(frozen=True) | @attr.s(frozen=True) | ||||
class DirectoryEntry(BaseModel): | class DirectoryEntry(BaseModel): | ||||
name = attr.ib(type=bytes, validator=type_validator()) | name = attr.ib(type=bytes, validator=type_validator()) | ||||
type = attr.ib(type=str, validator=attr.validators.in_(["file", "dir", "rev"])) | type = attr.ib(type=str, validator=attr.validators.in_(["file", "dir", "rev"])) | ||||
target = attr.ib(type=Sha1Git, validator=type_validator()) | target = attr.ib(type=Sha1Git, validator=type_validator()) | ||||
perms = attr.ib(type=int, validator=type_validator()) | perms = attr.ib(type=int, validator=type_validator()) | ||||
"""Usually one of the values of `swh.model.from_disk.DentryPerms`.""" | """Usually one of the values of `swh.model.from_disk.DentryPerms`.""" | ||||
▲ Show 20 Lines • Show All 180 Lines • Show Last 20 Lines |
name and email are just display helpers. The anonymous version should probably only hash the fullname data.