Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7437562
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
14 KB
Subscribers
None
View Options
diff --git a/swh/storage/converters.py b/swh/storage/converters.py
index 7bdc4421..541d5aa2 100644
--- a/swh/storage/converters.py
+++ b/swh/storage/converters.py
@@ -1,315 +1,320 @@
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
+from typing import Optional, Dict
+
from swh.core.utils import decode_with_escape, encode_with_unescape
from swh.model import identifiers
from swh.model.hashutil import MultiHash
DEFAULT_AUTHOR = {
"fullname": None,
"name": None,
"email": None,
}
DEFAULT_DATE = {
"timestamp": None,
"offset": 0,
"neg_utc_offset": None,
}
def author_to_db(author):
"""Convert a swh-model author to its DB representation.
Args:
author: a :mod:`swh.model` compatible author
Returns:
dict: a dictionary with three keys: author, fullname and email
"""
if author is None:
return DEFAULT_AUTHOR
return author
-def db_to_author(fullname, name, email):
+def db_to_author(
+ fullname: Optional[bytes], name: Optional[bytes], email: Optional[bytes]
+) -> Optional[Dict[str, Optional[bytes]]]:
"""Convert the DB representation of an author to a swh-model author.
Args:
- id (long): the author's identifier
fullname (bytes): the author's fullname
name (bytes): the author's name
email (bytes): the author's email
Returns:
- dict: a dictionary with four keys: id, fullname, name and email, or
- None if the id is None
+ a dictionary with three keys (fullname, name and email), or
+ None if all the arguments are None.
"""
+ if (fullname, name, email) == (None, None, None):
+ return None
return {
"fullname": fullname,
"name": name,
"email": email,
}
def git_headers_to_db(git_headers):
"""Convert git headers to their database representation.
We convert the bytes to unicode by decoding them into utf-8 and replacing
invalid utf-8 sequences with backslash escapes.
"""
ret = []
for key, values in git_headers:
if isinstance(values, list):
ret.append([key, [decode_with_escape(value) for value in values]])
else:
ret.append([key, decode_with_escape(values)])
return ret
def db_to_git_headers(db_git_headers):
ret = []
for key, values in db_git_headers:
if isinstance(values, list):
ret.append([key, [encode_with_unescape(value) for value in values]])
else:
ret.append([key, encode_with_unescape(values)])
return ret
def db_to_date(date, offset, neg_utc_offset):
"""Convert the DB representation of a date to a swh-model compatible date.
Args:
date (datetime.datetime): a date pulled out of the database
offset (int): an integer number of minutes representing an UTC offset
neg_utc_offset (boolean): whether an utc offset is negative
Returns:
dict: a dict with three keys:
- timestamp: a timestamp from UTC
- offset: the number of minutes since UTC
- negative_utc: whether a null UTC offset is negative
"""
if date is None:
return None
return {
"timestamp": {
"seconds": int(date.timestamp()),
"microseconds": date.microsecond,
},
"offset": offset,
"negative_utc": neg_utc_offset,
}
def date_to_db(date_offset):
"""Convert a swh-model date_offset to its DB representation.
Args:
date_offset: a :mod:`swh.model` compatible date_offset
Returns:
dict: a dictionary with three keys:
- timestamp: a date in ISO format
- offset: the UTC offset in minutes
- neg_utc_offset: a boolean indicating whether a null offset is
negative or positive.
"""
if date_offset is None:
return DEFAULT_DATE
normalized = identifiers.normalize_timestamp(date_offset)
ts = normalized["timestamp"]
seconds = ts.get("seconds", 0)
microseconds = ts.get("microseconds", 0)
timestamp = datetime.datetime.fromtimestamp(seconds, datetime.timezone.utc)
timestamp = timestamp.replace(microsecond=microseconds)
return {
# PostgreSQL supports isoformatted timestamps
"timestamp": timestamp.isoformat(),
"offset": normalized["offset"],
"neg_utc_offset": normalized["negative_utc"],
}
def revision_to_db(rev):
"""Convert a swh-model revision to its database representation.
"""
revision = rev.to_dict()
author = author_to_db(revision["author"])
date = date_to_db(revision["date"])
committer = author_to_db(revision["committer"])
committer_date = date_to_db(revision["committer_date"])
metadata = revision["metadata"]
if metadata and "extra_headers" in metadata:
metadata = metadata.copy()
extra_headers = git_headers_to_db(metadata["extra_headers"])
metadata["extra_headers"] = extra_headers
return {
"id": revision["id"],
"author_fullname": author["fullname"],
"author_name": author["name"],
"author_email": author["email"],
"date": date["timestamp"],
"date_offset": date["offset"],
"date_neg_utc_offset": date["neg_utc_offset"],
"committer_fullname": committer["fullname"],
"committer_name": committer["name"],
"committer_email": committer["email"],
"committer_date": committer_date["timestamp"],
"committer_date_offset": committer_date["offset"],
"committer_date_neg_utc_offset": committer_date["neg_utc_offset"],
"type": revision["type"],
"directory": revision["directory"],
"message": revision["message"],
"metadata": metadata,
"synthetic": revision["synthetic"],
"parents": [
{"id": revision["id"], "parent_id": parent, "parent_rank": i,}
for i, parent in enumerate(revision["parents"])
],
}
def db_to_revision(db_revision):
"""Convert a database representation of a revision to its swh-model
representation."""
author = db_to_author(
db_revision["author_fullname"],
db_revision["author_name"],
db_revision["author_email"],
)
date = db_to_date(
db_revision["date"],
db_revision["date_offset"],
db_revision["date_neg_utc_offset"],
)
committer = db_to_author(
db_revision["committer_fullname"],
db_revision["committer_name"],
db_revision["committer_email"],
)
committer_date = db_to_date(
db_revision["committer_date"],
db_revision["committer_date_offset"],
db_revision["committer_date_neg_utc_offset"],
)
metadata = db_revision["metadata"]
if metadata and "extra_headers" in metadata:
extra_headers = db_to_git_headers(metadata["extra_headers"])
metadata["extra_headers"] = extra_headers
parents = []
if "parents" in db_revision:
for parent in db_revision["parents"]:
if parent:
parents.append(parent)
ret = {
"id": db_revision["id"],
"author": author,
"date": date,
"committer": committer,
"committer_date": committer_date,
"type": db_revision["type"],
"directory": db_revision["directory"],
"message": db_revision["message"],
"metadata": metadata,
"synthetic": db_revision["synthetic"],
"parents": parents,
}
if "object_id" in db_revision:
ret["object_id"] = db_revision["object_id"]
return ret
def release_to_db(rel):
"""Convert a swh-model release to its database representation.
"""
release = rel.to_dict()
author = author_to_db(release["author"])
date = date_to_db(release["date"])
return {
"id": release["id"],
"author_fullname": author["fullname"],
"author_name": author["name"],
"author_email": author["email"],
"date": date["timestamp"],
"date_offset": date["offset"],
"date_neg_utc_offset": date["neg_utc_offset"],
"name": release["name"],
"target": release["target"],
"target_type": release["target_type"],
"comment": release["message"],
"synthetic": release["synthetic"],
}
def db_to_release(db_release):
"""Convert a database representation of a release to its swh-model
representation.
"""
author = db_to_author(
db_release["author_fullname"],
db_release["author_name"],
db_release["author_email"],
)
date = db_to_date(
db_release["date"], db_release["date_offset"], db_release["date_neg_utc_offset"]
)
ret = {
"author": author,
"date": date,
"id": db_release["id"],
"name": db_release["name"],
"message": db_release["comment"],
"synthetic": db_release["synthetic"],
"target": db_release["target"],
"target_type": db_release["target_type"],
}
if "object_id" in db_release:
ret["object_id"] = db_release["object_id"]
return ret
def origin_url_to_sha1(origin_url):
"""Convert an origin URL to a sha1. Encodes URL to utf-8."""
return MultiHash.from_data(origin_url.encode("utf-8"), {"sha1"}).digest()["sha1"]
diff --git a/swh/storage/tests/test_converters.py b/swh/storage/tests/test_converters.py
index 72443ec4..83f63e11 100644
--- a/swh/storage/tests/test_converters.py
+++ b/swh/storage/tests/test_converters.py
@@ -1,152 +1,160 @@
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.storage import converters
def test_date_to_db():
date_to_db = converters.date_to_db
assert date_to_db(None) == {"timestamp": None, "offset": 0, "neg_utc_offset": None}
assert date_to_db(
{"timestamp": 1234567890, "offset": 120, "negative_utc": False,}
) == {
"timestamp": "2009-02-13T23:31:30+00:00",
"offset": 120,
"neg_utc_offset": False,
}
assert date_to_db(
{"timestamp": 1123456789, "offset": 0, "negative_utc": True,}
) == {
"timestamp": "2005-08-07T23:19:49+00:00",
"offset": 0,
"neg_utc_offset": True,
}
assert date_to_db(
{"timestamp": 1234567890, "offset": 42, "negative_utc": False,}
) == {
"timestamp": "2009-02-13T23:31:30+00:00",
"offset": 42,
"neg_utc_offset": False,
}
assert date_to_db(
{"timestamp": 1634366813, "offset": -120, "negative_utc": False,}
) == {
"timestamp": "2021-10-16T06:46:53+00:00",
"offset": -120,
"neg_utc_offset": False,
}
def test_db_to_author():
# when
actual_author = converters.db_to_author(b"fullname", b"name", b"email")
# then
assert actual_author == {
"fullname": b"fullname",
"name": b"name",
"email": b"email",
}
+def test_db_to_author_none():
+ # when
+ actual_author = converters.db_to_author(None, None, None)
+
+ # then
+ assert actual_author is None
+
+
def test_db_to_revision():
# when
actual_revision = converters.db_to_revision(
{
"id": "revision-id",
"date": None,
"date_offset": None,
"date_neg_utc_offset": None,
"committer_date": None,
"committer_date_offset": None,
"committer_date_neg_utc_offset": None,
"type": "rev",
"directory": b"dir-sha1",
"message": b"commit message",
"author_fullname": b"auth-fullname",
"author_name": b"auth-name",
"author_email": b"auth-email",
"committer_fullname": b"comm-fullname",
"committer_name": b"comm-name",
"committer_email": b"comm-email",
"metadata": {},
"synthetic": False,
"parents": [123, 456],
}
)
# then
assert actual_revision == {
"id": "revision-id",
"author": {
"fullname": b"auth-fullname",
"name": b"auth-name",
"email": b"auth-email",
},
"date": None,
"committer": {
"fullname": b"comm-fullname",
"name": b"comm-name",
"email": b"comm-email",
},
"committer_date": None,
"type": "rev",
"directory": b"dir-sha1",
"message": b"commit message",
"metadata": {},
"synthetic": False,
"parents": [123, 456],
}
def test_db_to_release():
# when
actual_release = converters.db_to_release(
{
"id": b"release-id",
"target": b"revision-id",
"target_type": "revision",
"date": None,
"date_offset": None,
"date_neg_utc_offset": None,
"name": b"release-name",
"comment": b"release comment",
"synthetic": True,
"author_fullname": b"auth-fullname",
"author_name": b"auth-name",
"author_email": b"auth-email",
}
)
# then
assert actual_release == {
"author": {
"fullname": b"auth-fullname",
"name": b"auth-name",
"email": b"auth-email",
},
"date": None,
"id": b"release-id",
"name": b"release-name",
"message": b"release comment",
"synthetic": True,
"target": b"revision-id",
"target_type": "revision",
}
def test_db_to_git_headers():
raw_data = [
["gpgsig", b"garbage\x89a\x43b\x14"],
["extra", [b"foo\\\\\\o", b"bar\\", b"inval\\\\\x99id"]],
]
db_data = converters.git_headers_to_db(raw_data)
loop = converters.db_to_git_headers(db_data)
assert raw_data == loop
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Apr 15, 12:22 AM (1 w, 6 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3285935
Attached To
rDSTO Storage manager
Event Timeline
Log In to Comment