Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/tar/build.py
# Copyright (C) 2015-2018 The Software Heritage developers | # Copyright (C) 2015-2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | import arrow | ||||
from swh.core import utils | |||||
# Static setup | # Static setup | ||||
EPOCH = 0 | EPOCH = 0 | ||||
UTC_OFFSET = 0 | UTC_OFFSET = 0 | ||||
SWH_PERSON = { | SWH_PERSON = { | ||||
'name': 'Software Heritage', | 'name': 'Software Heritage', | ||||
'fullname': 'Software Heritage', | 'fullname': 'Software Heritage', | ||||
'email': 'robot@softwareheritage.org' | 'email': 'robot@softwareheritage.org' | ||||
} | } | ||||
REVISION_MESSAGE = 'synthetic revision message' | REVISION_MESSAGE = 'swh-loader-tar: synthetic revision message' | ||||
REVISION_TYPE = 'tar' | REVISION_TYPE = 'tar' | ||||
def compute_origin(url_scheme, url_type, root_dirpath, tarpath): | def _time_from_last_modified(last_modified): | ||||
"""Compute the origin. | |||||
Args: | |||||
- url_scheme: scheme to build the origin's url | |||||
- url_type: origin's type | |||||
- root_dirpath: the top level root directory path | |||||
- tarpath: file's absolute path | |||||
Returns: | |||||
Dictionary origin with keys: | |||||
- url: origin's url | |||||
- type: origin's type | |||||
""" | |||||
relative_path = utils.commonname(root_dirpath, tarpath) | |||||
return { | |||||
'url': ''.join([url_scheme, | |||||
os.path.dirname(relative_path)]), | |||||
'type': url_type, | |||||
} | |||||
def _time_from_path(tarpath): | |||||
"""Compute the modification time from the tarpath. | """Compute the modification time from the tarpath. | ||||
Args: | Args: | ||||
tarpath (str|bytes): Full path to the archive to extract the | last_modified (str): Last modification time | ||||
date from. | |||||
Returns: | Returns: | ||||
dict representing a timestamp with keys seconds and microseconds keys. | dict representing a timestamp with keys {seconds, microseconds} | ||||
""" | """ | ||||
mtime = os.lstat(tarpath).st_mtime | last_modified = arrow.get(last_modified) | ||||
if isinstance(mtime, float): | mtime = last_modified.float_timestamp | ||||
normalized_time = list(map(int, str(mtime).split('.'))) | normalized_time = list(map(int, str(mtime).split('.'))) | ||||
else: # assuming int | |||||
normalized_time = [mtime, 0] | |||||
return { | return { | ||||
'seconds': normalized_time[0], | 'seconds': normalized_time[0], | ||||
'microseconds': normalized_time[1] | 'microseconds': normalized_time[1] | ||||
} | } | ||||
def compute_revision(tarpath): | def compute_revision(tarpath, last_modified): | ||||
"""Compute a revision. | """Compute a revision. | ||||
Args: | Args: | ||||
tarpath: absolute path to the tarball | tarpath (str): absolute path to the tarball | ||||
last_modified (str): Time of last modification read from the | |||||
source remote (most probably by the lister) | |||||
Returns: | Returns: | ||||
Revision as dict: | Revision as dict: | ||||
- date (dict): the modification timestamp as returned by | - date (dict): the modification timestamp as returned by | ||||
_time_from_path function | _time_from_path function | ||||
- committer_date: the modification timestamp as returned by | - committer_date: the modification timestamp as returned by | ||||
_time_from_path function | _time_from_path function | ||||
- author: cf. SWH_PERSON | - author: cf. SWH_PERSON | ||||
- committer: cf. SWH_PERSON | - committer: cf. SWH_PERSON | ||||
- type: cf. REVISION_TYPE | - type: cf. REVISION_TYPE | ||||
- message: cf. REVISION_MESSAGE | - message: cf. REVISION_MESSAGE | ||||
""" | """ | ||||
ts = _time_from_path(tarpath) | ts = _time_from_last_modified(last_modified) | ||||
return { | return { | ||||
'date': { | 'date': { | ||||
'timestamp': ts, | 'timestamp': ts, | ||||
'offset': UTC_OFFSET, | 'offset': UTC_OFFSET, | ||||
}, | }, | ||||
'committer_date': { | 'committer_date': { | ||||
'timestamp': ts, | 'timestamp': ts, | ||||
'offset': UTC_OFFSET, | 'offset': UTC_OFFSET, | ||||
}, | }, | ||||
'author': SWH_PERSON, | 'author': SWH_PERSON, | ||||
'committer': SWH_PERSON, | 'committer': SWH_PERSON, | ||||
'type': REVISION_TYPE, | 'type': REVISION_TYPE, | ||||
'message': REVISION_MESSAGE, | 'message': REVISION_MESSAGE, | ||||
'synthetic': True, | |||||
} | } |