Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/storage/converters.py b/swh/storage/converters.py
index 7bdc4421..541d5aa2 100644
--- a/swh/storage/converters.py
+++ b/swh/storage/converters.py
@@ -1,315 +1,320 @@
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
+from typing import Optional, Dict
+
from swh.core.utils import decode_with_escape, encode_with_unescape
from swh.model import identifiers
from swh.model.hashutil import MultiHash
DEFAULT_AUTHOR = {
"fullname": None,
"name": None,
"email": None,
}
DEFAULT_DATE = {
"timestamp": None,
"offset": 0,
"neg_utc_offset": None,
}
def author_to_db(author):
"""Convert a swh-model author to its DB representation.
Args:
author: a :mod:`swh.model` compatible author
Returns:
dict: a dictionary with three keys: author, fullname and email
"""
if author is None:
return DEFAULT_AUTHOR
return author
-def db_to_author(fullname, name, email):
+def db_to_author(
+ fullname: Optional[bytes], name: Optional[bytes], email: Optional[bytes]
+) -> Optional[Dict[str, Optional[bytes]]]:
"""Convert the DB representation of an author to a swh-model author.
Args:
- id (long): the author's identifier
fullname (bytes): the author's fullname
name (bytes): the author's name
email (bytes): the author's email
Returns:
- dict: a dictionary with four keys: id, fullname, name and email, or
- None if the id is None
+ a dictionary with three keys (fullname, name and email), or
+ None if all the arguments are None.
"""
+ if (fullname, name, email) == (None, None, None):
+ return None
return {
"fullname": fullname,
"name": name,
"email": email,
}
def git_headers_to_db(git_headers):
"""Convert git headers to their database representation.
We convert the bytes to unicode by decoding them into utf-8 and replacing
invalid utf-8 sequences with backslash escapes.
"""
ret = []
for key, values in git_headers:
if isinstance(values, list):
ret.append([key, [decode_with_escape(value) for value in values]])
else:
ret.append([key, decode_with_escape(values)])
return ret
def db_to_git_headers(db_git_headers):
ret = []
for key, values in db_git_headers:
if isinstance(values, list):
ret.append([key, [encode_with_unescape(value) for value in values]])
else:
ret.append([key, encode_with_unescape(values)])
return ret
def db_to_date(date, offset, neg_utc_offset):
"""Convert the DB representation of a date to a swh-model compatible date.
Args:
date (datetime.datetime): a date pulled out of the database
offset (int): an integer number of minutes representing an UTC offset
neg_utc_offset (boolean): whether an utc offset is negative
Returns:
dict: a dict with three keys:
- timestamp: a timestamp from UTC
- offset: the number of minutes since UTC
- negative_utc: whether a null UTC offset is negative
"""
if date is None:
return None
return {
"timestamp": {
"seconds": int(date.timestamp()),
"microseconds": date.microsecond,
},
"offset": offset,
"negative_utc": neg_utc_offset,
}
def date_to_db(date_offset):
"""Convert a swh-model date_offset to its DB representation.
Args:
date_offset: a :mod:`swh.model` compatible date_offset
Returns:
dict: a dictionary with three keys:
- timestamp: a date in ISO format
- offset: the UTC offset in minutes
- neg_utc_offset: a boolean indicating whether a null offset is
negative or positive.
"""
if date_offset is None:
return DEFAULT_DATE
normalized = identifiers.normalize_timestamp(date_offset)
ts = normalized["timestamp"]
seconds = ts.get("seconds", 0)
microseconds = ts.get("microseconds", 0)
timestamp = datetime.datetime.fromtimestamp(seconds, datetime.timezone.utc)
timestamp = timestamp.replace(microsecond=microseconds)
return {
# PostgreSQL supports isoformatted timestamps
"timestamp": timestamp.isoformat(),
"offset": normalized["offset"],
"neg_utc_offset": normalized["negative_utc"],
}
def revision_to_db(rev):
"""Convert a swh-model revision to its database representation.
"""
revision = rev.to_dict()
author = author_to_db(revision["author"])
date = date_to_db(revision["date"])
committer = author_to_db(revision["committer"])
committer_date = date_to_db(revision["committer_date"])
metadata = revision["metadata"]
if metadata and "extra_headers" in metadata:
metadata = metadata.copy()
extra_headers = git_headers_to_db(metadata["extra_headers"])
metadata["extra_headers"] = extra_headers
return {
"id": revision["id"],
"author_fullname": author["fullname"],
"author_name": author["name"],
"author_email": author["email"],
"date": date["timestamp"],
"date_offset": date["offset"],
"date_neg_utc_offset": date["neg_utc_offset"],
"committer_fullname": committer["fullname"],
"committer_name": committer["name"],
"committer_email": committer["email"],
"committer_date": committer_date["timestamp"],
"committer_date_offset": committer_date["offset"],
"committer_date_neg_utc_offset": committer_date["neg_utc_offset"],
"type": revision["type"],
"directory": revision["directory"],
"message": revision["message"],
"metadata": metadata,
"synthetic": revision["synthetic"],
"parents": [
{"id": revision["id"], "parent_id": parent, "parent_rank": i,}
for i, parent in enumerate(revision["parents"])
],
}
def db_to_revision(db_revision):
"""Convert a database representation of a revision to its swh-model
representation."""
author = db_to_author(
db_revision["author_fullname"],
db_revision["author_name"],
db_revision["author_email"],
)
date = db_to_date(
db_revision["date"],
db_revision["date_offset"],
db_revision["date_neg_utc_offset"],
)
committer = db_to_author(
db_revision["committer_fullname"],
db_revision["committer_name"],
db_revision["committer_email"],
)
committer_date = db_to_date(
db_revision["committer_date"],
db_revision["committer_date_offset"],
db_revision["committer_date_neg_utc_offset"],
)
metadata = db_revision["metadata"]
if metadata and "extra_headers" in metadata:
extra_headers = db_to_git_headers(metadata["extra_headers"])
metadata["extra_headers"] = extra_headers
parents = []
if "parents" in db_revision:
for parent in db_revision["parents"]:
if parent:
parents.append(parent)
ret = {
"id": db_revision["id"],
"author": author,
"date": date,
"committer": committer,
"committer_date": committer_date,
"type": db_revision["type"],
"directory": db_revision["directory"],
"message": db_revision["message"],
"metadata": metadata,
"synthetic": db_revision["synthetic"],
"parents": parents,
}
if "object_id" in db_revision:
ret["object_id"] = db_revision["object_id"]
return ret
def release_to_db(rel):
"""Convert a swh-model release to its database representation.
"""
release = rel.to_dict()
author = author_to_db(release["author"])
date = date_to_db(release["date"])
return {
"id": release["id"],
"author_fullname": author["fullname"],
"author_name": author["name"],
"author_email": author["email"],
"date": date["timestamp"],
"date_offset": date["offset"],
"date_neg_utc_offset": date["neg_utc_offset"],
"name": release["name"],
"target": release["target"],
"target_type": release["target_type"],
"comment": release["message"],
"synthetic": release["synthetic"],
}
def db_to_release(db_release):
"""Convert a database representation of a release to its swh-model
representation.
"""
author = db_to_author(
db_release["author_fullname"],
db_release["author_name"],
db_release["author_email"],
)
date = db_to_date(
db_release["date"], db_release["date_offset"], db_release["date_neg_utc_offset"]
)
ret = {
"author": author,
"date": date,
"id": db_release["id"],
"name": db_release["name"],
"message": db_release["comment"],
"synthetic": db_release["synthetic"],
"target": db_release["target"],
"target_type": db_release["target_type"],
}
if "object_id" in db_release:
ret["object_id"] = db_release["object_id"]
return ret
def origin_url_to_sha1(origin_url):
"""Convert an origin URL to a sha1. Encodes URL to utf-8."""
return MultiHash.from_data(origin_url.encode("utf-8"), {"sha1"}).digest()["sha1"]
diff --git a/swh/storage/tests/test_converters.py b/swh/storage/tests/test_converters.py
index 72443ec4..83f63e11 100644
--- a/swh/storage/tests/test_converters.py
+++ b/swh/storage/tests/test_converters.py
@@ -1,152 +1,160 @@
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.storage import converters
def test_date_to_db():
date_to_db = converters.date_to_db
assert date_to_db(None) == {"timestamp": None, "offset": 0, "neg_utc_offset": None}
assert date_to_db(
{"timestamp": 1234567890, "offset": 120, "negative_utc": False,}
) == {
"timestamp": "2009-02-13T23:31:30+00:00",
"offset": 120,
"neg_utc_offset": False,
}
assert date_to_db(
{"timestamp": 1123456789, "offset": 0, "negative_utc": True,}
) == {
"timestamp": "2005-08-07T23:19:49+00:00",
"offset": 0,
"neg_utc_offset": True,
}
assert date_to_db(
{"timestamp": 1234567890, "offset": 42, "negative_utc": False,}
) == {
"timestamp": "2009-02-13T23:31:30+00:00",
"offset": 42,
"neg_utc_offset": False,
}
assert date_to_db(
{"timestamp": 1634366813, "offset": -120, "negative_utc": False,}
) == {
"timestamp": "2021-10-16T06:46:53+00:00",
"offset": -120,
"neg_utc_offset": False,
}
def test_db_to_author():
# when
actual_author = converters.db_to_author(b"fullname", b"name", b"email")
# then
assert actual_author == {
"fullname": b"fullname",
"name": b"name",
"email": b"email",
}
+def test_db_to_author_none():
+ # when
+ actual_author = converters.db_to_author(None, None, None)
+
+ # then
+ assert actual_author is None
+
+
def test_db_to_revision():
# when
actual_revision = converters.db_to_revision(
{
"id": "revision-id",
"date": None,
"date_offset": None,
"date_neg_utc_offset": None,
"committer_date": None,
"committer_date_offset": None,
"committer_date_neg_utc_offset": None,
"type": "rev",
"directory": b"dir-sha1",
"message": b"commit message",
"author_fullname": b"auth-fullname",
"author_name": b"auth-name",
"author_email": b"auth-email",
"committer_fullname": b"comm-fullname",
"committer_name": b"comm-name",
"committer_email": b"comm-email",
"metadata": {},
"synthetic": False,
"parents": [123, 456],
}
)
# then
assert actual_revision == {
"id": "revision-id",
"author": {
"fullname": b"auth-fullname",
"name": b"auth-name",
"email": b"auth-email",
},
"date": None,
"committer": {
"fullname": b"comm-fullname",
"name": b"comm-name",
"email": b"comm-email",
},
"committer_date": None,
"type": "rev",
"directory": b"dir-sha1",
"message": b"commit message",
"metadata": {},
"synthetic": False,
"parents": [123, 456],
}
def test_db_to_release():
# when
actual_release = converters.db_to_release(
{
"id": b"release-id",
"target": b"revision-id",
"target_type": "revision",
"date": None,
"date_offset": None,
"date_neg_utc_offset": None,
"name": b"release-name",
"comment": b"release comment",
"synthetic": True,
"author_fullname": b"auth-fullname",
"author_name": b"auth-name",
"author_email": b"auth-email",
}
)
# then
assert actual_release == {
"author": {
"fullname": b"auth-fullname",
"name": b"auth-name",
"email": b"auth-email",
},
"date": None,
"id": b"release-id",
"name": b"release-name",
"message": b"release comment",
"synthetic": True,
"target": b"revision-id",
"target_type": "revision",
}
def test_db_to_git_headers():
raw_data = [
["gpgsig", b"garbage\x89a\x43b\x14"],
["extra", [b"foo\\\\\\o", b"bar\\", b"inval\\\\\x99id"]],
]
db_data = converters.git_headers_to_db(raw_data)
loop = converters.db_to_git_headers(db_data)
assert raw_data == loop

File Metadata

Mime Type
text/x-diff
Expires
Tue, Apr 15, 12:22 AM (1 w, 6 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3285935

Event Timeline