Page MenuHomeSoftware Heritage

D2733.id9757.diff
No OneTemporary

D2733.id9757.diff

diff --git a/mypy.ini b/mypy.ini
--- a/mypy.ini
+++ b/mypy.ini
@@ -13,3 +13,6 @@
[mypy-pytest.*]
ignore_missing_imports = True
+
+[mypy-swh.loader.package.*]
+ignore_missing_imports = True
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,4 @@
swh.storage >= 0.0.163
swh.model >= 0.0.59
swh.scheduler >= 0.0.39
-swh.loader.core >= 0.0.78
+swh.loader.core >= 0.0.80
diff --git a/swh/loader/svn/converters.py b/swh/loader/svn/converters.py
--- a/swh/loader/svn/converters.py
+++ b/swh/loader/svn/converters.py
@@ -3,12 +3,20 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from typing import Any, Dict, List, Optional, Union
+
from email import utils
+from swh.model.model import (
+ Person, Revision, RevisionType, TimestampWithTimezone
+)
+from swh.loader.package.utils import EMPTY_AUTHOR
+
from .utils import strdate_to_timestamp
-def svn_date_to_swh_date(strdate):
+def svn_date_to_swh_date(
+ strdate: Optional[str]) -> TimestampWithTimezone:
"""Convert a string date to an swh one.
Args:
@@ -19,46 +27,44 @@
An swh date format
"""
- return {
- 'timestamp': strdate_to_timestamp(strdate),
- 'offset': 0
- }
+ return TimestampWithTimezone(
+ timestamp=strdate_to_timestamp(strdate),
+ offset=0,
+ negative_utc=False,
+ )
-def svn_author_to_swh_person(author):
+def svn_author_to_swh_person(author: Union[str, bytes]) -> Person:
"""Convert an svn author to an swh person.
Default policy: No information is added.
Args:
author (string): the svn author (in bytes)
- Returns: a dictionary with keys:
- fullname: the author's associated fullname
- name: the author's associated name
- email: None (no email in svn)
+ Returns:
+ a Person
"""
if not author:
- return {'fullname': b'', 'name': None, 'email': None}
+ return EMPTY_AUTHOR
if isinstance(author, str):
author = author.encode('utf-8')
if b'<' in author and b'>' in author:
name, email = utils.parseaddr(author.decode('utf-8'))
- return {
- 'fullname': author,
- 'name': name.encode('utf-8'),
- 'email': email.encode('utf-8')
- }
+ return Person(
+ fullname=author,
+ name=name.encode('utf-8'),
+ email=email.encode('utf-8')
+ )
- return {'fullname': author, 'email': None, 'name': author}
+ return Person(fullname=author, name=author, email=None)
- }
-
-
-def build_swh_revision(rev, commit, repo_uuid, dir_id, parents):
+def build_swh_revision(
+ rev: int, commit: Dict, repo_uuid: str, dir_id: bytes,
+ parents: List[bytes]) -> Revision:
"""Given a svn revision, build a swh revision.
This adds an ['metadata']['extra-headers'] entry with the
@@ -79,22 +85,22 @@
msg = commit['message']
date = commit['author_date']
- metadata = {
+ metadata: Dict[str, Any] = {
'extra_headers': [
['svn_repo_uuid', repo_uuid],
['svn_revision', str(rev).encode('utf-8')]
]
}
- return {
- 'date': date,
- 'committer_date': date,
- 'type': 'svn',
- 'directory': dir_id,
- 'message': msg,
- 'author': author,
- 'committer': author,
- 'synthetic': True,
- 'metadata': metadata,
- 'parents': parents,
- }
+ return Revision(
+ type=RevisionType.SUBVERSION,
+ date=date,
+ committer_date=date,
+ directory=dir_id,
+ message=msg,
+ author=author,
+ committer=author,
+ synthetic=True,
+ metadata=metadata,
+ parents=parents,
+ )
diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py
--- a/swh/loader/svn/loader.py
+++ b/swh/loader/svn/loader.py
@@ -15,12 +15,14 @@
from mmap import mmap, ACCESS_WRITE
from subprocess import Popen
+from typing import Iterator, List, Tuple
from swh.model import hashutil
-from swh.model.from_disk import Directory
-from swh.model.identifiers import identifier_to_bytes, revision_identifier
-from swh.model.identifiers import snapshot_identifier
-from swh.loader.core.converters import prepare_contents
+from swh.model.model import (
+ Content, Directory, Origin, SkippedContent, Revision, Snapshot,
+ SnapshotBranch, TargetType
+)
+from swh.model import from_disk
from swh.loader.core.loader import BaseLoader
from swh.loader.core.utils import clean_dangling_folders
from swh.storage.algos.snapshot import snapshot_get_all_branches
@@ -37,23 +39,14 @@
DEFAULT_BRANCH = b'HEAD'
-def _revision_id(revision):
- return identifier_to_bytes(revision_identifier(revision))
-
-
def build_swh_snapshot(revision_id, branch=DEFAULT_BRANCH):
"""Build a swh snapshot from the revision id, origin url, and visit.
"""
- return {
- 'id': None,
- 'branches': {
- branch: {
- 'target': revision_id,
- 'target_type': 'revision',
- }
- }
- }
+ return Snapshot(branches={
+ branch: SnapshotBranch(
+ target=revision_id, target_type=TargetType.REVISION)
+ })
TEMPORARY_DIR_PREFIX_PATTERN = 'swh.loader.svn.'
@@ -100,6 +93,7 @@
self.check_revision = None
# internal state used to store swh objects
self._contents = []
+ self._skipped_contents = []
self._directories = []
self._revisions = []
self._snapshot = None
@@ -145,7 +139,7 @@
"""
local_dirname, local_url = self.svnrepo.export_temporary(revision)
- h = Directory.from_disk(path=local_url).hash
+ h = from_disk.Directory.from_disk(path=local_url).hash
self.svnrepo.clean_fs(local_dirname)
return h
@@ -237,12 +231,9 @@
rev, _, commit, _, root_dir = list(hash_data_per_revs)[0]
dir_id = root_dir.hash
- swh_revision = self.build_swh_revision(rev,
- commit,
- dir_id,
- parents)
- swh_revision_id = _revision_id(swh_revision)
-
+ swh_revision = self.build_swh_revision(
+ rev, commit, dir_id, parents)
+ swh_revision_id = swh_revision.id
return swh_revision_id == revision_id
def _init_from(self, partial_swh_revision, previous_swh_revision):
@@ -388,8 +379,14 @@
hashutil.hash_to_hex(checked_dir_id))
raise ValueError(err)
- def process_svn_revisions(self, svnrepo, revision_start, revision_end,
- revision_parents):
+ def process_svn_revisions(
+ self, svnrepo, revision_start, revision_end,
+ revision_parents) -> Iterator[
+ Tuple[
+ List[Content], List[SkippedContent], List[Directory],
+ Revision
+ ]
+ ]:
"""Process svn revisions from revision_start to revision_end.
At each svn revision, apply new diffs and simultaneously
@@ -416,33 +413,30 @@
for rev, nextrev, commit, new_objects, root_directory in gen_revs:
count += 1
# Send the associated contents/directories
- _contents = new_objects.get('content', {}).values()
- _directories = new_objects.get('directory', {}).values()
+ _contents, _skipped_contents, _directories = new_objects
# compute the fs tree's checksums
dir_id = root_directory.hash
swh_revision = self.build_swh_revision(
rev, commit, dir_id, revision_parents[rev])
- swh_revision['id'] = _revision_id(swh_revision)
-
self.log.debug('rev: %s, swhrev: %s, dir: %s' % (
rev,
- hashutil.hash_to_hex(swh_revision['id']),
+ hashutil.hash_to_hex(swh_revision.id),
hashutil.hash_to_hex(dir_id)))
if self.check_revision:
self._check_revision_divergence(count, rev, dir_id)
if nextrev:
- revision_parents[nextrev] = [swh_revision['id']]
+ revision_parents[nextrev] = [swh_revision.id]
- yield _contents, _directories, swh_revision
+ yield _contents, _skipped_contents, _directories, swh_revision
def prepare_origin_visit(self, *args, **kwargs):
- self.origin = {
- 'url': self.origin_url if self.origin_url else self.svn_url,
- }
+ self.origin = Origin(
+ url=self.origin_url if self.origin_url else self.svn_url
+ )
def prepare(self, *args, **kwargs):
if self.swh_revision:
@@ -514,10 +508,11 @@
self.done = True
self._visit_status = 'partial'
return False # Stopping iteration
- self._contents, self._directories, revision = data
- if revision:
+ self._contents, self._skipped_contents, self._directories, rev = data
+ if rev:
+ revision = rev
self._last_revision = revision
- self._revisions.append(revision)
+ self._revisions.append(revision)
return True # next svn revision
def store_data(self):
@@ -528,11 +523,8 @@
This also resets the internal instance variable state.
"""
- contents, skipped_contents = prepare_contents(
- self._contents, max_content_size=self.max_content_size,
- origin_url=self.origin['url'])
- self.storage.skipped_content_add(skipped_contents)
- self.storage.content_add(contents)
+ self.storage.skipped_content_add(self._skipped_contents)
+ self.storage.content_add(self._contents)
self.storage.directory_add(self._directories)
self.storage.revision_add(self._revisions)
@@ -542,8 +534,8 @@
snapshot=self._snapshot
)
self.flush()
- self.storage.origin_visit_update(self.origin['url'], self.visit,
- snapshot=snapshot['id'])
+ self.storage.origin_visit_update(
+ self.origin.url, self.visit, snapshot=snapshot.id)
self._contents = []
self._directories = []
@@ -564,8 +556,7 @@
"""
if revision: # Priority to the revision
- snap = build_swh_snapshot(revision['id'])
- snap['id'] = identifier_to_bytes(snapshot_identifier(snap))
+ snap = build_swh_snapshot(revision.id)
elif snapshot: # Fallback to prior snapshot
snap = snapshot
else:
diff --git a/swh/loader/svn/ra.py b/swh/loader/svn/ra.py
--- a/swh/loader/svn/ra.py
+++ b/swh/loader/svn/ra.py
@@ -13,11 +13,14 @@
import shutil
import tempfile
+from typing import List, Tuple
+
from subvertpy import delta, properties
from subvertpy.ra import RemoteAccess, Auth, get_username_provider
from swh.model import hashutil
-from swh.model.from_disk import Content, Directory
+from swh.model import from_disk
+from swh.model.model import Content, Directory, SkippedContent
_eol_style = {
@@ -245,10 +248,11 @@
data = f.read()
data = _normalize_line_endings(data, eol_style)
mode = os.lstat(self.fullpath).st_mode
- self.directory[self.path] = Content.from_bytes(mode=mode,
- data=data)
+ self.directory[self.path] = from_disk.Content.from_bytes(
+ mode=mode, data=data)
else:
- self.directory[self.path] = Content.from_file(path=self.fullpath)
+ self.directory[self.path] = from_disk.Content.from_file(
+ path=self.fullpath)
class BaseDirEditor:
@@ -318,7 +322,7 @@
"""
path = os.fsencode(args[0])
- self.directory[path] = Content()
+ self.directory[path] = from_disk.Content()
return FileEditor(self.directory, rootpath=self.rootpath, path=path)
def add_file(self, path, copyfrom_path=None, copyfrom_rev=-1):
@@ -326,7 +330,7 @@
"""
path = os.fsencode(path)
- self.directory[path] = Content()
+ self.directory[path] = from_disk.Content()
return FileEditor(self.directory, self.rootpath, path)
def change_prop(self, key, value):
@@ -378,7 +382,7 @@
"""
path = os.fsencode(path)
os.makedirs(os.path.join(self.rootpath, path), exist_ok=True)
- self.directory[path] = Directory()
+ self.directory[path] = from_disk.Directory()
return self
@@ -414,7 +418,7 @@
self.conn = conn
self.rootpath = rootpath
if directory is None:
- directory = Directory()
+ directory = from_disk.Directory()
self.directory = directory
self.editor = Editor(rootpath=rootpath, directory=directory)
@@ -433,7 +437,8 @@
codecs.register_error("strict", codecs.strict_errors)
return self.editor.directory
- def compute_hashes(self, rev):
+ def compute_hashes(self, rev: int) -> Tuple[
+ List[Content], List[SkippedContent], List[Directory]]:
"""Compute hashes at revisions rev.
Expects the state to be at previous revision's objects.
@@ -446,7 +451,23 @@
"""
self.replay(rev)
- return self.directory.collect()
+ contents: List[Content] = []
+ skipped_contents: List[SkippedContent] = []
+ directories: List[Directory] = []
+ for obj in self.directory.iter_tree():
+ obj = obj.to_model()
+ if isinstance(obj, Content):
+ obj = obj.with_data()
+ contents.append(obj)
+ elif isinstance(obj, SkippedContent):
+ skipped_contents.append(obj)
+ elif isinstance(obj, Directory):
+ directories.append(obj)
+ else:
+ raise TypeError(
+ f'Unexpected content type from disk: {obj}')
+
+ return contents, skipped_contents, directories
@click.command()
@@ -487,12 +508,13 @@
replay = Replay(conn, rootpath)
for rev in range(revision_start, revision_end+1):
- objects = replay.compute_hashes(rev)
+ contents, skipped_contents, directories = replay.compute_hashes(
+ rev)
print("r%s %s (%s new contents, %s new directories)" % (
rev,
hashutil.hash_to_hex(replay.directory.hash),
- len(objects.get('content', {})),
- len(objects.get('directory', {})),
+ len(contents) + len(skipped_contents),
+ len(directories),
))
if debug:
diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py
--- a/swh/loader/svn/svn.py
+++ b/swh/loader/svn/svn.py
@@ -221,14 +221,14 @@
"""
for commit in self.logs(start_revision, end_revision):
rev = commit['rev']
- objects = self.swhreplay.compute_hashes(rev)
+ gens = self.swhreplay.compute_hashes(rev)
if rev == end_revision:
nextrev = None
else:
nextrev = rev + 1
- yield rev, nextrev, commit, objects, self.swhreplay.directory
+ yield rev, nextrev, commit, gens, self.swhreplay.directory
def swh_hash_data_at_revision(self, revision):
"""Compute the hash data at revision.
diff --git a/swh/loader/svn/tests/test_converters.py b/swh/loader/svn/tests/test_converters.py
--- a/swh/loader/svn/tests/test_converters.py
+++ b/swh/loader/svn/tests/test_converters.py
@@ -3,148 +3,135 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import unittest
-
+from swh.model.hashutil import hash_to_bytes
+from swh.model.model import (
+ Person, Revision, Timestamp, TimestampWithTimezone
+)
from swh.loader.svn import converters
-class TestAuthorConverters(unittest.TestCase):
- def test_svn_author_to_swh_person(self):
- """The author should have name, email and fullname filled.
-
- """
- actual_person = converters.svn_author_to_swh_person(
- 'tony <ynot@dagobah>')
- self.assertEqual(actual_person, {
- 'fullname': b'tony <ynot@dagobah>',
- 'name': b'tony',
- 'email': b'ynot@dagobah',
- })
-
- def test_svn_author_to_swh_person_no_email(self):
- """The author and fullname should be the same as the input (author).
-
- """
- actual_person = converters.svn_author_to_swh_person('tony')
- self.assertEqual(actual_person, {
- 'fullname': b'tony',
- 'name': b'tony',
- 'email': None,
- })
-
- def test_svn_author_to_swh_person_empty_person(self):
- """Empty person has only its fullname filled with the empty
- byte-string.
-
- """
- actual_person = converters.svn_author_to_swh_person('')
- self.assertEqual(actual_person, {
- 'fullname': b'',
- 'name': None,
- 'email': None,
- })
-
-
-class TestRevisionConverters(unittest.TestCase):
- def test_build_swh_revision_default(self):
- """This should build the swh revision with the swh revision's extra
- headers about the repository.
-
- """
- actual_swh_revision = converters.build_swh_revision(
- repo_uuid=b'uuid',
- dir_id='dir-id',
- commit={
- 'author_name': {
- 'name': b'theo',
- 'email': b'theo@uuid',
- 'fullname': b'theo <theo@uuid>'
- },
- 'message': b'commit message',
- 'author_date': {
- 'timestamp': {
- 'seconds': 1088108379,
- 'microseconds': 0,
- },
- 'offset': 0
- }
- },
- rev=10,
- parents=['123'])
-
- date = {
- 'timestamp': {
- 'seconds': 1088108379,
- 'microseconds': 0,
- },
- 'offset': 0,
- }
-
- self.assertEqual(actual_swh_revision, {
- 'date': date,
- 'committer_date': date,
- 'type': 'svn',
- 'directory': 'dir-id',
+def test_svn_author_to_swh_person():
+ """The author should have name, email and fullname filled.
+
+ """
+ actual_person = converters.svn_author_to_swh_person(
+ 'tony <ynot@dagobah>')
+
+ assert actual_person == Person.from_dict({
+ 'fullname': b'tony <ynot@dagobah>',
+ 'name': b'tony',
+ 'email': b'ynot@dagobah',
+ })
+
+
+def test_svn_author_to_swh_person_no_email():
+ """The author and fullname should be the same as the input (author).
+
+ """
+ actual_person = converters.svn_author_to_swh_person('tony')
+ assert actual_person == Person.from_dict({
+ 'fullname': b'tony',
+ 'name': b'tony',
+ 'email': None,
+ })
+
+
+def test_svn_author_to_swh_person_empty_person():
+ """Empty person has only its fullname filled with the empty
+ byte-string.
+
+ """
+ actual_person = converters.svn_author_to_swh_person('')
+ assert actual_person == Person.from_dict({
+ 'fullname': b'',
+ 'name': None,
+ 'email': None,
+ })
+
+
+def test_build_swh_revision_default():
+ """This should build the swh revision with the swh revision's extra
+ headers about the repository.
+
+ """
+ dir_id = hash_to_bytes('d6e08e19159f77983242877c373c75222d5ae9dd')
+ date = TimestampWithTimezone(
+ timestamp=Timestamp(seconds=1088108379, microseconds=0),
+ offset=0,
+ negative_utc=False,
+ )
+ actual_rev = converters.build_swh_revision(
+ repo_uuid=b'uuid',
+ dir_id=dir_id,
+ commit={
+ 'author_name': Person(
+ name=b'theo',
+ email=b'theo@uuid',
+ fullname=b'theo <theo@uuid>'
+ ),
'message': b'commit message',
- 'author': {
- 'name': b'theo',
- 'email': b'theo@uuid',
- 'fullname': b'theo <theo@uuid>'
- },
- 'committer': {
- 'name': b'theo',
- 'email': b'theo@uuid',
- 'fullname': b'theo <theo@uuid>'
- },
- 'synthetic': True,
- 'metadata': {
- 'extra_headers': [
- ['svn_repo_uuid', b'uuid'],
- ['svn_revision', b'10'],
- ]
- },
- 'parents': ['123'],
- })
-
-
-class ConvertDate(unittest.TestCase):
- def test_svn_date_to_swh_date(self):
- """The timestamp should not be tampered with and include the
- decimals.
-
- """
- self.assertEqual(
- converters.svn_date_to_swh_date('2011-05-31T06:04:39.500900Z'), {
- 'timestamp': {
- 'seconds': 1306821879,
- 'microseconds': 500900,
- },
- 'offset': 0
- })
-
- self.assertEqual(
- converters.svn_date_to_swh_date('2011-05-31T06:04:39.800722Z'),
- {
- 'timestamp': {
- 'seconds': 1306821879,
- 'microseconds': 800722,
- },
- 'offset': 0
- })
-
- def test_svn_date_to_swh_date_epoch(self):
- """Empty date should be EPOCH (timestamp and offset at 0)."""
- # It should return 0, epoch
- self.assertEqual({
- 'timestamp': {
- 'seconds': 0,
- 'microseconds': 0,
- },
- 'offset': 0,
- }, converters.svn_date_to_swh_date(''))
- self.assertEqual({
- 'timestamp': {
- 'seconds': 0,
- 'microseconds': 0,
- }, 'offset': 0,
- }, converters.svn_date_to_swh_date(None))
+ 'author_date': date,
+ },
+ rev=10,
+ parents=[])
+
+ expected_rev = Revision.from_dict({
+ 'date': date.to_dict(),
+ 'committer_date': date.to_dict(),
+ 'type': 'svn',
+ 'directory': dir_id,
+ 'message': b'commit message',
+ 'author': {
+ 'name': b'theo',
+ 'email': b'theo@uuid',
+ 'fullname': b'theo <theo@uuid>'
+ },
+ 'committer': {
+ 'name': b'theo',
+ 'email': b'theo@uuid',
+ 'fullname': b'theo <theo@uuid>'
+ },
+ 'synthetic': True,
+ 'metadata': {
+ 'extra_headers': [
+ ['svn_repo_uuid', b'uuid'],
+ ['svn_revision', b'10'],
+ ]
+ },
+ 'parents': [],
+ })
+
+ assert actual_rev == expected_rev
+
+
+def test_svn_date_to_swh_date():
+ """The timestamp should not be tampered with and include the
+ decimals.
+
+ """
+ assert converters.svn_date_to_swh_date('2011-05-31T06:04:39.500900Z') == \
+ TimestampWithTimezone(
+ timestamp=Timestamp(seconds=1306821879, microseconds=500900),
+ offset=0,
+ negative_utc=False,
+ )
+
+ assert converters.svn_date_to_swh_date('2011-05-31T06:04:39.800722Z') == \
+ TimestampWithTimezone(
+ timestamp=Timestamp(seconds=1306821879, microseconds=800722),
+ offset=0,
+ negative_utc=False,
+ )
+
+
+def test_svn_date_to_swh_date_epoch():
+ """Empty date should be EPOCH (timestamp and offset at 0)."""
+ # It should return 0, epoch
+ default_tstz = TimestampWithTimezone(
+ timestamp=Timestamp(seconds=0, microseconds=0),
+ offset=0,
+ negative_utc=False,
+ )
+
+ assert converters.svn_date_to_swh_date('') == default_tstz
+ assert converters.svn_date_to_swh_date(None) == default_tstz
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -10,18 +10,27 @@
from swh.loader.svn.loader import (DEFAULT_BRANCH, SvnLoader,
SvnLoaderFromRemoteDump, build_swh_snapshot)
from swh.model import hashutil
+from swh.model.model import (
+ Origin, Snapshot
+)
def test_build_swh_snapshot():
- assert build_swh_snapshot('revision-id') == {
- 'id': None,
+ rev_id = hashutil.hash_to_bytes(
+ '3f51abf3b3d466571be0855dfa67e094f9ceff1b')
+ snap = build_swh_snapshot(rev_id)
+
+ assert isinstance(snap, Snapshot)
+
+ expected_snapshot = Snapshot.from_dict({
'branches': {
DEFAULT_BRANCH: {
- 'target': 'revision-id',
+ 'target': rev_id,
'target_type': 'revision',
}
}
- }
+ })
+ assert snap == expected_snapshot
_LOADER_TEST_CONFIG = {
@@ -35,9 +44,6 @@
'storage': {
'cls': 'pipeline',
'steps': [
- {
- 'cls': 'validate'
- },
{
'cls': 'retry',
},
@@ -106,14 +112,7 @@
super().__init__(url, destination_path=destination_path,
start_from_scratch=start_from_scratch,
swh_revision=swh_revision)
- self.origin = {
- 'id': 1,
- 'url': url,
- }
- self.visit = {
- 'origin': 1,
- 'visit': 1,
- }
+ self.origin = Origin(url=url)
self.last_snp_rev = last_snp_rev
def parse_config_file(self, *args, **kwargs):
@@ -207,10 +206,10 @@
_LAST_SNP_REV = {
- 'snapshot': {
+ 'snapshot': Snapshot.from_dict({
'id': GOURMET_FLAG_SNAPSHOT,
'branches': {}
- },
+ }),
'revision': {
'id': hashutil.hash_to_bytes(
'4876cb10aec6f708f7466dddf547567b65f6c39c'),
diff --git a/swh/loader/svn/tests/test_utils.py b/swh/loader/svn/tests/test_utils.py
--- a/swh/loader/svn/tests/test_utils.py
+++ b/swh/loader/svn/tests/test_utils.py
@@ -1,46 +1,46 @@
-# Copyright (C) 2016 The Software Heritage developers
+# Copyright (C) 2016-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import pty
-import unittest
from subprocess import Popen
+from swh.model.model import Timestamp
from swh.loader.svn import utils
-class TestUtils(unittest.TestCase):
- def test_strdate_to_timestamp(self):
- """Formatted string date should be converted in timestamp."""
- actual_ts = utils.strdate_to_timestamp('2011-05-31T06:04:39.800722Z')
- self.assertEqual(actual_ts, {'seconds': 1306821879,
- 'microseconds': 800722})
-
- actual_ts = utils.strdate_to_timestamp('2011-05-31T06:03:39.123450Z')
- self.assertEqual(actual_ts, {'seconds': 1306821819,
- 'microseconds': 123450})
-
- def test_strdate_to_timestamp_empty_does_not_break(self):
- """Empty or None date should be timestamp 0."""
- self.assertEqual({'seconds': 0, 'microseconds': 0},
- utils.strdate_to_timestamp(''))
- self.assertEqual({'seconds': 0, 'microseconds': 0},
- utils.strdate_to_timestamp(None))
-
- def test_outputstream(self):
- stdout_r, stdout_w = pty.openpty()
- echo = Popen(['echo', '-e', 'foo\nbar\nbaz'], stdout=stdout_w)
- os.close(stdout_w)
- stdout_stream = utils.OutputStream(stdout_r)
- lines = []
- while True:
- current_lines, readable = stdout_stream.read_lines()
- lines += current_lines
- if not readable:
- break
- echo.wait()
- os.close(stdout_r)
- self.assertEqual(lines, ['foo', 'bar', 'baz'])
+def test_outputstream():
+ stdout_r, stdout_w = pty.openpty()
+ echo = Popen(['echo', '-e', 'foo\nbar\nbaz'], stdout=stdout_w)
+ os.close(stdout_w)
+ stdout_stream = utils.OutputStream(stdout_r)
+ lines = []
+ while True:
+ current_lines, readable = stdout_stream.read_lines()
+ lines += current_lines
+ if not readable:
+ break
+ echo.wait()
+ os.close(stdout_r)
+ assert lines == ['foo', 'bar', 'baz']
+
+
+def test_strdate_to_timestamp():
+ """Formatted string date should be converted in timestamp."""
+ actual_ts = utils.strdate_to_timestamp('2011-05-31T06:04:39.800722Z')
+ assert actual_ts == Timestamp(seconds=1306821879,
+ microseconds=800722)
+
+ actual_ts = utils.strdate_to_timestamp('2011-05-31T06:03:39.123450Z')
+ assert actual_ts == Timestamp(seconds=1306821819,
+ microseconds=123450)
+
+
+def test_strdate_to_timestamp_empty_does_not_break():
+ """Empty or None date should be timestamp 0."""
+ default_ts = Timestamp(seconds=0, microseconds=0)
+ assert default_ts == utils.strdate_to_timestamp('')
+ assert default_ts == utils.strdate_to_timestamp(None)
diff --git a/swh/loader/svn/utils.py b/swh/loader/svn/utils.py
--- a/swh/loader/svn/utils.py
+++ b/swh/loader/svn/utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2016 The Software Heritage developers
+# Copyright (C) 2016-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,8 +11,10 @@
from dateutil import parser
from subprocess import PIPE, Popen, call
+from swh.model.model import Optional, Timestamp
-def strdate_to_timestamp(strdate):
+
+def strdate_to_timestamp(strdate: Optional[str]) -> Timestamp:
"""Convert a string date to an int timestamp.
Args:
@@ -31,7 +33,7 @@
}
else: # epoch
ts = {'seconds': 0, 'microseconds': 0}
- return ts
+ return Timestamp.from_dict(ts)
class OutputStream:

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 19, 1:14 AM (3 h, 56 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219078

Event Timeline