Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7163761
D3171.id11270.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D3171.id11270.diff
View Options
diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py
--- a/swh/model/hypothesis_strategies.py
+++ b/swh/model/hypothesis_strategies.py
@@ -93,10 +93,13 @@
return "%s://%s" % (protocol, domain)
-def persons_d():
- return builds(
- dict, fullname=binary(), email=optional(binary()), name=optional(binary()),
- )
+@composite
+def persons_d(draw):
+ fullname = draw(binary())
+ email = draw(optional(binary()))
+ name = draw(optional(binary()))
+ assume(not (len(fullname) == 32 and email is None and name is None))
+ return dict(fullname=fullname, name=name, email=email)
def persons():
diff --git a/swh/model/model.py b/swh/model/model.py
--- a/swh/model/model.py
+++ b/swh/model/model.py
@@ -7,7 +7,8 @@
from abc import ABCMeta, abstractmethod
from enum import Enum
-from typing import Dict, List, Optional, Union
+from hashlib import sha256
+from typing import Dict, List, Optional, TypeVar, Union
import attr
from attrs_strict import type_validator
@@ -51,6 +52,9 @@
return value
+ModelType = TypeVar("ModelType", bound="BaseModel")
+
+
class BaseModel:
"""Base class for SWH model classes.
@@ -68,6 +72,13 @@
recursively builds the corresponding objects."""
return cls(**d)
+ def anonymize(self: ModelType) -> Optional[ModelType]:
+ """Returns an anonymized version of the object, if needed.
+
+ If the object model does not need/support anonymization, returns None.
+ """
+ return None
+
class HashableObject(metaclass=ABCMeta):
"""Mixin to automatically compute object identifier hash when
@@ -129,6 +140,14 @@
return Person(name=name or None, email=email or None, fullname=fullname,)
+ def anonymize(self) -> "Person":
+ """Returns an anonymized version of the Person object.
+
+ Anonymization is simply a Person which fullname is the hashed, with unset name
+ or email.
+ """
+ return Person(fullname=sha256(self.fullname).digest(), name=None, email=None,)
+
@attr.s(frozen=True)
class Timestamp(BaseModel):
@@ -369,6 +388,14 @@
d["date"] = TimestampWithTimezone.from_dict(d["date"])
return cls(target_type=ObjectType(d.pop("target_type")), **d)
+ def anonymize(self) -> "Release":
+ """Returns an anonymized version of the Release object.
+
+ Anonymization consists in replacing the author with an anonymized Person object.
+ """
+ author = self.author and self.author.anonymize()
+ return attr.evolve(self, author=author)
+
class RevisionType(Enum):
GIT = "git"
@@ -422,6 +449,16 @@
**d,
)
+ def anonymize(self) -> "Revision":
+ """Returns an anonymized version of the Revision object.
+
+ Anonymization consists in replacing the author and committer with an anonymized
+ Person object.
+ """
+ return attr.evolve(
+ self, author=self.author.anonymize(), committer=self.committer.anonymize()
+ )
+
@attr.s(frozen=True)
class DirectoryEntry(BaseModel):
diff --git a/swh/model/tests/test_hypothesis_strategies.py b/swh/model/tests/test_hypothesis_strategies.py
--- a/swh/model/tests/test_hypothesis_strategies.py
+++ b/swh/model/tests/test_hypothesis_strategies.py
@@ -18,6 +18,7 @@
skipped_contents,
snapshots,
origin_visits,
+ persons,
)
from swh.model.model import TargetType
@@ -196,3 +197,10 @@
@given(origin_visits())
def test_origin_visit_aware_datetime(visit):
assert visit.date.tzinfo is not None
+
+
+@given(persons())
+def test_person_do_not_look_like_anonimized(person):
+ assert not (
+ len(person.fullname) == 32 and person.name is None and person.email is None
+ )
diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py
--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -61,6 +61,37 @@
assert obj_as_dict == type(obj).from_dict(obj_as_dict).to_dict()
+# Anonymization
+
+
+@given(strategies.objects())
+def test_anonymization(objtype_and_obj):
+ (obj_type, obj) = objtype_and_obj
+
+ def check_person(p):
+ if p is not None:
+ assert p.name is None
+ assert p.email is None
+ assert len(p.fullname) == 32
+
+ anon_obj = obj.anonymize()
+ if obj_type == "person":
+ assert anon_obj is not None
+ check_person(anon_obj)
+ elif obj_type == "release":
+ assert anon_obj is not None
+ check_person(anon_obj.author)
+ elif obj_type == "revision":
+ assert anon_obj is not None
+ check_person(anon_obj.author)
+ check_person(anon_obj.committer)
+ else:
+ assert anon_obj is None
+
+
+# Origin, OriginVisit
+
+
@given(strategies.origins())
def test_todict_origins(origin):
obj = origin.to_dict()
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jan 30, 2:51 PM (7 h, 41 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3215668
Attached To
D3171: Add support for model object anonymization
Event Timeline
Log In to Comment