Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/git/tests/test_loader.py
# Copyright (C) 2018 The Software Heritage developers | # Copyright (C) 2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os.path | import os.path | ||||
import zipfile | import zipfile | ||||
import tempfile | import tempfile | ||||
import subprocess | import subprocess | ||||
from swh.loader.git.loader import GitLoader, GitLoaderFromArchive | from swh.loader.git.loader import GitLoader, GitLoaderFromArchive | ||||
from swh.loader.core.tests import BaseLoaderTest, LoaderNoStorage | from swh.loader.core.tests import BaseLoaderTest, LoaderNoStorage | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes, hash_to_bytehex | ||||
class MockStorage0: | class MockStorage0: | ||||
"""The storage's state before anything is added.""" | """The storage's state before anything is added.""" | ||||
def snapshot_get_latest(self, origin_id): | def snapshot_get_latest(self, origin_id): | ||||
return None | return None | ||||
def content_missing(self, contents, key_hash='sha1'): | def content_missing(self, contents, key_hash='sha1'): | ||||
return [c[key_hash] for c in contents] | return [c[key_hash] for c in contents] | ||||
def directory_missing(self, directories): | def directory_missing(self, directories): | ||||
return directories | return directories | ||||
def revision_missing(self, revisions): | def revision_missing(self, revisions): | ||||
return revisions | return revisions | ||||
def object_find_by_sha1_git(self, ids): | |||||
return {} | |||||
CONTENT1 = { | CONTENT1 = { | ||||
'33ab5639bfd8e7b95eb1d8d0b87781d4ffea4d5d', # README v1 | '33ab5639bfd8e7b95eb1d8d0b87781d4ffea4d5d', # README v1 | ||||
'349c4ff7d21f1ec0eda26f3d9284c293e3425417', # README v2 | '349c4ff7d21f1ec0eda26f3d9284c293e3425417', # README v2 | ||||
'799c11e348d39f1704022b8354502e2f81f3c037', # file1.txt | '799c11e348d39f1704022b8354502e2f81f3c037', # file1.txt | ||||
'4bdb40dfd6ec75cb730e678b5d7786e30170c5fb', # file2.txt | '4bdb40dfd6ec75cb730e678b5d7786e30170c5fb', # file2.txt | ||||
} | } | ||||
▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines | def directory_missing(self, directories): | ||||
return (set(directories) - | return (set(directories) - | ||||
set(map(hash_to_bytes, REVISIONS1)) - | set(map(hash_to_bytes, REVISIONS1)) - | ||||
{hash_to_bytes(SUBDIR_HASH)}) | {hash_to_bytes(SUBDIR_HASH)}) | ||||
def revision_missing(self, revisions): | def revision_missing(self, revisions): | ||||
assert all(isinstance(r, bytes) for r in revisions) | assert all(isinstance(r, bytes) for r in revisions) | ||||
return list(set(revisions) - set(map(hash_to_bytes, REVISIONS1))) | return list(set(revisions) - set(map(hash_to_bytes, REVISIONS1))) | ||||
def object_find_by_sha1_git(self, ids): | |||||
ardumont: It'd be good to explain this.
Those values are returned to simulate archive cache hit.
Thus… | |||||
Done Inline ActionsWhat do you mean? It has the exact same behavior as Storage.object_find_by_sha1_git (other than operating on mock data) vlorentz: What do you mean? It has the exact same behavior as `Storage.object_find_by_sha1_git` (other… | |||||
Done Inline ActionsI mean as there is is no context in that diff, it's not apparent that this diff actually test anything. In general, having context helps. ardumont: I mean as there is is no context in that diff, it's not apparent that this diff actually test… | |||||
res = {} | |||||
for id_ in ids: | |||||
found = [] | |||||
decoded_id = hash_to_bytehex(id_) | |||||
if decoded_id in REVISIONS1: | |||||
found.append({ | |||||
'sha1_git': id_, | |||||
'type': 'revision', | |||||
'id': id_, | |||||
'object_id': 42, | |||||
}) | |||||
elif decoded_id in REVISIONS1.values(): | |||||
found.append({ | |||||
'sha1_git': id_, | |||||
'type': 'directory', | |||||
'id': id_, | |||||
'object_id': 42, | |||||
}) | |||||
elif decoded_id == SUBDIR_HASH: | |||||
found.append({ | |||||
'sha1_git': id_, | |||||
'type': 'directory', | |||||
'id': id_, | |||||
'object_id': 42, | |||||
}) | |||||
elif decoded_id in CONTENT1: | |||||
found.append({ | |||||
'sha1_git': id_, | |||||
'type': 'content', | |||||
'id': id_, | |||||
'object_id': 42, | |||||
}) | |||||
res[id_] = found | |||||
return res | |||||
class LoaderNoStorageMixin(LoaderNoStorage): | class LoaderNoStorageMixin(LoaderNoStorage): | ||||
def __init__(self): | def __init__(self): | ||||
super().__init__() | super().__init__() | ||||
self.origin_id = 1 | self.origin_id = 1 | ||||
self.visit = 1 | self.visit = 1 | ||||
self.storage = MockStorage0() | self.storage = MockStorage0() | ||||
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines | class BaseZipGitLoaderTest(BaseGitLoaderTest): | ||||
def load(self): | def load(self): | ||||
return self.loader.load( | return self.loader.load( | ||||
origin_url=self.repo_url, | origin_url=self.repo_url, | ||||
visit_date='2016-05-03 15:16:32+00', | visit_date='2016-05-03 15:16:32+00', | ||||
archive_path=self.destination_path) | archive_path=self.destination_path) | ||||
class GitLoaderTests: | class GitLoaderTests: | ||||
"""Common tests for all git loaders.""" | |||||
def test_load(self): | def test_load(self): | ||||
"""Loads a simple repository (made available by `setUp()`), | |||||
and checks everything was added in the storage.""" | |||||
res = self.load() | res = self.load() | ||||
self.assertEqual(res['status'], 'eventful', res) | self.assertEqual(res['status'], 'eventful', res) | ||||
self.assertCountContents(4) # two README, file1, file2 | self.assertCountContents(4) # two README, file1, file2 | ||||
self.assertCountDirectories(7) | self.assertCountDirectories(7) | ||||
self.assertCountReleases(0) # FIXME: why not 2? | self.assertCountReleases(0) # FIXME: why not 2? | ||||
self.assertCountRevisions(7) | self.assertCountRevisions(7) | ||||
self.assertCountSnapshots(1) | self.assertCountSnapshots(1) | ||||
self.assertRevisionsOk(REVISIONS1) | self.assertRevisionsOk(REVISIONS1) | ||||
self.assertSnapshotOk(SNAPSHOT1) | self.assertSnapshotOk(SNAPSHOT1) | ||||
self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) | self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) | ||||
self.assertEqual(self.loader.visit_status(), 'full') | self.assertEqual(self.loader.visit_status(), 'full') | ||||
def test_load_unchanged(self): | def test_load_unchanged(self): | ||||
"""Checks loading a repository a second time does not add | |||||
any extra data.""" | |||||
res = self.load() | res = self.load() | ||||
self.assertEqual(res['status'], 'eventful') | self.assertEqual(res['status'], 'eventful') | ||||
self.loader.storage = MockStorage1() | self.loader.storage = MockStorage1() | ||||
res = self.load() | res = self.load() | ||||
self.assertEqual(res['status'], 'uneventful') | self.assertEqual(res['status'], 'uneventful') | ||||
self.assertCountSnapshots(1) | self.assertCountSnapshots(1) | ||||
class DirGitLoaderTest(BaseDirGitLoaderTest, GitLoaderTests): | class DirGitLoaderTest(BaseDirGitLoaderTest, GitLoaderTests): | ||||
"""Tests for the GitLoader. Includes the common ones, and | |||||
add others that only work with a local dir.""" | |||||
def _git(self, *cmd): | def _git(self, *cmd): | ||||
"""Small wrapper around subprocess to call Git.""" | |||||
try: | try: | ||||
return subprocess.check_output( | return subprocess.check_output( | ||||
['git', '-C', self.destination_path] + list(cmd)) | ['git', '-C', self.destination_path] + list(cmd)) | ||||
except subprocess.CalledProcessError as e: | except subprocess.CalledProcessError as e: | ||||
print(e.output) | print(e.output) | ||||
print(e.stderr) | print(e.stderr) | ||||
raise | raise | ||||
def test_load_changed(self): | def test_load_changed(self): | ||||
"""Loads a repository, makes some changes by adding files, commits, | |||||
and merges, load it again, and check the storage contains everything | |||||
it should.""" | |||||
# Initial load | # Initial load | ||||
res = self.load() | res = self.load() | ||||
self.assertEqual(res['status'], 'eventful', res) | self.assertEqual(res['status'], 'eventful', res) | ||||
self._git('config', '--local', 'user.email', 'you@example.com') | self._git('config', '--local', 'user.email', 'you@example.com') | ||||
self._git('config', '--local', 'user.name', 'Your Name') | self._git('config', '--local', 'user.name', 'Your Name') | ||||
# Load with a new file + revision | # Load with a new file + revision | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | def test_load_changed(self): | ||||
# TODO: how to check the snapshot id? | # TODO: how to check the snapshot id? | ||||
# self.assertSnapshotOk(SNAPSHOT1) | # self.assertSnapshotOk(SNAPSHOT1) | ||||
self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) | self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) | ||||
self.assertEqual(self.loader.visit_status(), 'full') | self.assertEqual(self.loader.visit_status(), 'full') | ||||
class ZipGitLoaderTest(BaseZipGitLoaderTest, GitLoaderTests): | class ZipGitLoaderTest(BaseZipGitLoaderTest, GitLoaderTests): | ||||
"""Tests for GitLoaderFromArchive. Imports the common ones | |||||
from GitLoaderTests.""" | |||||
pass | pass |
It'd be good to explain this.
Those values are returned to simulate archive cache hit.
Thus the git updater's update behavior kicks in.
(i think)