Changeset View
Changeset View
Standalone View
Standalone View
swh/deposit/api/private/deposit_read.py
# Copyright (C) 2017-2018 The Software Heritage developers | # Copyright (C) 2017-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import os | import os | ||||
import shutil | import shutil | ||||
import tempfile | import tempfile | ||||
from contextlib import contextmanager | from contextlib import contextmanager | ||||
from dateutil import parser | |||||
from django.http import FileResponse | from django.http import FileResponse | ||||
from rest_framework import status | from rest_framework import status | ||||
from swh.core import tarball | from swh.core import tarball | ||||
from swh.model import identifiers | from swh.model import identifiers | ||||
from . import DepositReadMixin | from . import DepositReadMixin | ||||
from ...config import SWH_PERSON, ARCHIVE_TYPE | from ...config import SWH_PERSON, ARCHIVE_TYPE | ||||
▲ Show 20 Lines • Show All 111 Lines • ▼ Show 20 Lines | """ | ||||
def _retrieve_url(self, deposit, metadata): | def _retrieve_url(self, deposit, metadata): | ||||
client_domain = deposit.client.domain | client_domain = deposit.client.domain | ||||
for field in metadata: | for field in metadata: | ||||
if 'url' in field: | if 'url' in field: | ||||
if client_domain in metadata[field]: | if client_domain in metadata[field]: | ||||
return metadata[field] | return metadata[field] | ||||
def _prepare_date(self, date): | |||||
"""Prepare date fields as normalized swh date | |||||
If date is a list, elect arbitrarily the first element of that | |||||
list | |||||
If date is (then) a string, parse it through | |||||
dateutil.parser.parse to extract a datetime. | |||||
Then normalize it through | |||||
swh.model.identifiers.normalize_timestamp. | |||||
Returns | |||||
The swh date object | |||||
""" | |||||
if isinstance(date, list): | |||||
date = date[0] | |||||
if isinstance(date, str): | |||||
date = parser.parse(date) | |||||
return identifiers.normalize_timestamp(date) | |||||
def _normalize_dates(self, deposit, metadata): | |||||
"""Normalize the date to use as a tuple of author date, committer date | |||||
from the incoming metadata. | |||||
Args: | |||||
deposit (Deposit): Deposit model representation | |||||
metadata (Dict): Metadata dict representation | |||||
Returns: | |||||
Tuple of author date, committer date. Those dates are | |||||
swh normalized. | |||||
""" | |||||
commit_date = metadata.get('codemeta:datePublished') | |||||
douardda: nitpicking a bit, but IMHO the `t` temp var is not needed here. The whole if/elif block can… | |||||
Done Inline Actionsoh, righty right. ardumont: oh, righty right.
initially, i just returned the tuple `t` and did not do anything on it
up… | |||||
author_date = metadata.get('codemeta:dateCreated') | |||||
if author_date and commit_date: | |||||
pass | |||||
elif commit_date: | |||||
author_date = commit_date | |||||
elif author_date: | |||||
commit_date = author_date | |||||
else: | |||||
author_date = deposit.complete_date | |||||
commit_date = deposit.complete_date | |||||
return ( | |||||
self._prepare_date(author_date), | |||||
self._prepare_date(commit_date) | |||||
) | |||||
def metadata_read(self, deposit): | def metadata_read(self, deposit): | ||||
"""Read and aggregate multiple data on deposit into one unified data | """Read and aggregate multiple data on deposit into one unified data | ||||
dictionary. | dictionary. | ||||
Args: | Args: | ||||
deposit (Deposit): Deposit concerned by the data aggregation. | deposit (Deposit): Deposit concerned by the data aggregation. | ||||
Returns: | Returns: | ||||
Show All 17 Lines | def metadata_read(self, deposit): | ||||
# metadata provider | # metadata provider | ||||
self.provider['provider_name'] = deposit.client.last_name | self.provider['provider_name'] = deposit.client.last_name | ||||
self.provider['provider_url'] = deposit.client.provider_url | self.provider['provider_url'] = deposit.client.provider_url | ||||
revision_type = 'tar' | revision_type = 'tar' | ||||
revision_msg = '%s: Deposit %s in collection %s' % ( | revision_msg = '%s: Deposit %s in collection %s' % ( | ||||
fullname, deposit.id, deposit.collection.name) | fullname, deposit.id, deposit.collection.name) | ||||
complete_date = identifiers.normalize_timestamp(deposit.complete_date) | |||||
author_date, commit_date = self._normalize_dates(deposit, metadata) | |||||
data['revision'] = { | data['revision'] = { | ||||
'synthetic': True, | 'synthetic': True, | ||||
'date': complete_date, | 'date': author_date, | ||||
'committer_date': complete_date, | 'committer_date': commit_date, | ||||
Done Inline ActionsAccording to our discussion anyway, it's no longer the commit date, it's the author date. ardumont: According to our discussion anyway, it's no longer the commit date, it's the author date. | |||||
'author': author_committer, | 'author': author_committer, | ||||
'committer': author_committer, | 'committer': author_committer, | ||||
'type': revision_type, | 'type': revision_type, | ||||
'message': revision_msg, | 'message': revision_msg, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
} | } | ||||
if deposit.parent: | if deposit.parent: | ||||
Show All 24 Lines |
nitpicking a bit, but IMHO the t temp var is not needed here. The whole if/elif block can manipulate {author,commit}_date variables directly.