diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 0000000..5573ff9
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,2 @@
+# python: Reformat code with black 22.3.0
+22edda2475c39a2de84466d479ed38f245955915
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 05398bb..1c95e3d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,42 +1,40 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
hooks:
- id: trailing-whitespace
- id: check-json
- id: check-yaml
- repo: https://gitlab.com/pycqa/flake8
rev: 4.0.1
hooks:
- id: flake8
+ additional_dependencies: [flake8-bugbear==22.3.23]
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
- id: codespell
name: Check source code spelling
stages: [commit]
- - id: codespell
- name: Check commit message spelling
- stages: [commit-msg]
- repo: local
hooks:
- id: mypy
name: mypy
entry: mypy
args: [swh]
pass_filenames: false
language: system
types: [python]
- repo: https://github.com/PyCQA/isort
rev: 5.10.1
hooks:
- id: isort
- repo: https://github.com/python/black
- rev: 19.10b0
+ rev: 22.3.0
hooks:
- id: black
diff --git a/PKG-INFO b/PKG-INFO
index 410b963..de259a8 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,32 +1,32 @@
Metadata-Version: 2.1
Name: swh.loader.bzr
-Version: 1.2.0
+Version: 1.3.0
Summary: Software Heritage Bazaar/Breezy intent
Home-page: https://forge.softwareheritage.org/diffusion/DLDBZR/
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
Project-URL: Funding, https://www.softwareheritage.org/donate
Project-URL: Source, https://forge.softwareheritage.org/source/swh-loader-bzr
Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-loader-bzr/
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Classifier: Operating System :: OS Independent
Classifier: Development Status :: 3 - Alpha
Requires-Python: >=3.7
Description-Content-Type: text/x-rst
Provides-Extra: testing
License-File: LICENSE
License-File: AUTHORS
Software Heritage - Bazaar/Breezy loader
========================================
Loader for `Bazaar `_ and `Breezy
`_ repositories. Breezy is a friendly fork of Bazaar that
supports the Bazaar file format and network protocol.
diff --git a/pytest.ini b/pytest.ini
index b712d00..e7f139e 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,2 +1,2 @@
[pytest]
-norecursedirs = docs .*
+norecursedirs = build docs .*
diff --git a/requirements-swh.txt b/requirements-swh.txt
index b8df361..2087e44 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,4 @@
swh.model >= 2.6.1
swh.storage >= 0.41.1
swh.scheduler >= 0.23.0
-swh.loader.core >= 2.3.0
+swh.loader.core >= 3.0.0
diff --git a/requirements-test.txt b/requirements-test.txt
index b1fbdd3..8642834 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,5 +1,5 @@
-pytest < 7.0.0 # v7.0.0 removed _pytest.tmpdir.TempdirFactory, which is used by some of the pytest plugins we use
+pytest
pytest-mock
swh.core[http] >= 0.0.61
swh.scheduler[testing] >= 0.5.0
swh.storage[testing]
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index 1d722c2..f65ba0a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,8 +1,9 @@
[flake8]
-ignore = E203,E231,W503
+select = C,E,F,W,B950
+ignore = E203,E231,E501,W503
max-line-length = 88
[egg_info]
tag_build =
tag_date = 0
diff --git a/setup.py b/setup.py
index 4b61328..22a9bf5 100755
--- a/setup.py
+++ b/setup.py
@@ -1,77 +1,77 @@
#!/usr/bin/env python3
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from io import open
from os import path
from setuptools import find_packages, setup
here = path.abspath(path.dirname(__file__))
# Get the long description from the README file
with open(path.join(here, "README.rst"), encoding="utf-8") as f:
long_description = f.read()
def parse_requirements(*names):
requirements = []
for name in names:
if name:
reqf = "requirements-%s.txt" % name
else:
reqf = "requirements.txt"
if not path.exists(reqf):
return requirements
with open(reqf) as f:
for line in f.readlines():
line = line.strip()
if not line or line.startswith("#"):
continue
requirements.append(line)
return requirements
# Edit this part to match your module, replace foo by its name
# Full sample:
# https://forge.softwareheritage.org/diffusion/DCORE/browse/master/setup.py
setup(
name="swh.loader.bzr", # example: swh.loader.pypi
description="Software Heritage Bazaar/Breezy intent",
long_description=long_description,
long_description_content_type="text/x-rst",
python_requires=">=3.7",
author="Software Heritage developers",
author_email="swh-devel@inria.fr",
url="https://forge.softwareheritage.org/diffusion/DLDBZR/",
packages=find_packages(), # packages's modules
install_requires=parse_requirements(None, "swh"),
tests_require=parse_requirements("test"),
setup_requires=["setuptools-scm"],
use_scm_version=True,
extras_require={"testing": parse_requirements("test")},
include_package_data=True,
entry_points="""
[swh.workers]
loader.bzr=swh.loader.bzr:register
[console_scripts]
swh-bzr-identify=swh.loader.bzr.identify:main
""",
classifiers=[
"Programming Language :: Python :: 3",
"Intended Audience :: Developers",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: OS Independent",
"Development Status :: 3 - Alpha",
],
project_urls={
"Bug Reports": "https://forge.softwareheritage.org/maniphest",
"Funding": "https://www.softwareheritage.org/donate",
"Source": ("https://forge.softwareheritage.org/source/swh-loader-bzr"),
- "Documentation": "https://docs.softwareheritage.org/devel/swh-loader-bzr/", # NoQA: E501
+ "Documentation": "https://docs.softwareheritage.org/devel/swh-loader-bzr/", # NoQA: B950
},
)
diff --git a/swh.loader.bzr.egg-info/PKG-INFO b/swh.loader.bzr.egg-info/PKG-INFO
index 410b963..de259a8 100644
--- a/swh.loader.bzr.egg-info/PKG-INFO
+++ b/swh.loader.bzr.egg-info/PKG-INFO
@@ -1,32 +1,32 @@
Metadata-Version: 2.1
Name: swh.loader.bzr
-Version: 1.2.0
+Version: 1.3.0
Summary: Software Heritage Bazaar/Breezy intent
Home-page: https://forge.softwareheritage.org/diffusion/DLDBZR/
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
Project-URL: Funding, https://www.softwareheritage.org/donate
Project-URL: Source, https://forge.softwareheritage.org/source/swh-loader-bzr
Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-loader-bzr/
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Classifier: Operating System :: OS Independent
Classifier: Development Status :: 3 - Alpha
Requires-Python: >=3.7
Description-Content-Type: text/x-rst
Provides-Extra: testing
License-File: LICENSE
License-File: AUTHORS
Software Heritage - Bazaar/Breezy loader
========================================
Loader for `Bazaar `_ and `Breezy
`_ repositories. Breezy is a friendly fork of Bazaar that
supports the Bazaar file format and network protocol.
diff --git a/swh.loader.bzr.egg-info/SOURCES.txt b/swh.loader.bzr.egg-info/SOURCES.txt
index 67ae61a..764f7b5 100644
--- a/swh.loader.bzr.egg-info/SOURCES.txt
+++ b/swh.loader.bzr.egg-info/SOURCES.txt
@@ -1,62 +1,63 @@
+.git-blame-ignore-revs
.gitignore
.pre-commit-config.yaml
AUTHORS
CODE_OF_CONDUCT.md
CONTRIBUTORS
LICENSE
MANIFEST.in
Makefile
README.rst
conftest.py
mypy.ini
pyproject.toml
pytest.ini
requirements-swh.txt
requirements-test.txt
requirements.txt
setup.cfg
setup.py
tox.ini
docs/.gitignore
docs/Makefile
docs/README.rst
docs/conf.py
docs/how-bzr-works.rst
docs/index.rst
docs/_static/.placeholder
docs/_templates/.placeholder
swh/__init__.py
swh.loader.bzr.egg-info/PKG-INFO
swh.loader.bzr.egg-info/SOURCES.txt
swh.loader.bzr.egg-info/dependency_links.txt
swh.loader.bzr.egg-info/entry_points.txt
swh.loader.bzr.egg-info/requires.txt
swh.loader.bzr.egg-info/top_level.txt
swh/loader/__init__.py
swh/loader/bzr/__init__.py
swh/loader/bzr/loader.py
swh/loader/bzr/py.typed
swh/loader/bzr/tasks.py
swh/loader/bzr/tests/__init__.py
swh/loader/bzr/tests/conftest.py
swh/loader/bzr/tests/py.typed
swh/loader/bzr/tests/test_loader.py
swh/loader/bzr/tests/test_tasks.py
swh/loader/bzr/tests/data/broken-tags.sh
swh/loader/bzr/tests/data/broken-tags.tgz
swh/loader/bzr/tests/data/does-not-support-tags.sh
swh/loader/bzr/tests/data/does-not-support-tags.tgz
swh/loader/bzr/tests/data/empty.sh
swh/loader/bzr/tests/data/empty.tgz
swh/loader/bzr/tests/data/ghosts.py
swh/loader/bzr/tests/data/ghosts.tgz
swh/loader/bzr/tests/data/metadata-and-type-changes.sh
swh/loader/bzr/tests/data/metadata-and-type-changes.tgz
swh/loader/bzr/tests/data/needs-upgrade.sh
swh/loader/bzr/tests/data/needs-upgrade.tgz
swh/loader/bzr/tests/data/no-branch.sh
swh/loader/bzr/tests/data/no-branch.tgz
swh/loader/bzr/tests/data/nominal.sh
swh/loader/bzr/tests/data/nominal.tgz
swh/loader/bzr/tests/data/renames.sh
swh/loader/bzr/tests/data/renames.tgz
\ No newline at end of file
diff --git a/swh.loader.bzr.egg-info/requires.txt b/swh.loader.bzr.egg-info/requires.txt
index 44ec749..7ad779a 100644
--- a/swh.loader.bzr.egg-info/requires.txt
+++ b/swh.loader.bzr.egg-info/requires.txt
@@ -1,12 +1,12 @@
breezy
swh.model>=2.6.1
swh.storage>=0.41.1
swh.scheduler>=0.23.0
-swh.loader.core>=2.3.0
+swh.loader.core>=3.0.0
[testing]
-pytest<7.0.0
+pytest
pytest-mock
swh.core[http]>=0.0.61
swh.scheduler[testing]>=0.5.0
swh.storage[testing]
diff --git a/swh/loader/bzr/loader.py b/swh/loader/bzr/loader.py
index bb06189..032529f 100644
--- a/swh/loader/bzr/loader.py
+++ b/swh/loader/bzr/loader.py
@@ -1,705 +1,705 @@
# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""This document contains a SWH loader for ingesting repository data
from Bazaar or Breezy.
"""
from datetime import datetime
from functools import lru_cache, partial
import itertools
import os
from tempfile import mkdtemp
-from typing import Dict, Iterator, List, NewType, Optional, Set, Tuple, TypeVar, Union
+from typing import (
+ Any,
+ Dict,
+ Iterator,
+ List,
+ NewType,
+ Optional,
+ Set,
+ Tuple,
+ TypeVar,
+ Union,
+)
from breezy import errors as bzr_errors
from breezy import repository, tsort
from breezy.builtins import cmd_branch, cmd_upgrade
from breezy.bzr import bzrdir
from breezy.bzr.branch import Branch as BzrBranch
from breezy.bzr.inventory import Inventory, InventoryEntry
from breezy.bzr.inventorytree import InventoryTreeChange
from breezy.revision import NULL_REVISION
from breezy.revision import Revision as BzrRevision
from breezy.tree import Tree
from swh.loader.core.loader import BaseLoader
from swh.loader.core.utils import clean_dangling_folders, clone_with_timeout
from swh.model import from_disk, swhids
from swh.model.model import (
Content,
ExtID,
ObjectType,
- Origin,
Person,
Release,
Revision,
RevisionType,
Sha1Git,
Snapshot,
SnapshotBranch,
TargetType,
Timestamp,
TimestampWithTimezone,
)
from swh.storage.algos.snapshot import snapshot_get_latest
from swh.storage.interface import StorageInterface
TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.bzr.from_disk"
EXTID_TYPE = "bzr-nodeid"
EXTID_VERSION: int = 1
BzrRevisionId = NewType("BzrRevisionId", bytes)
T = TypeVar("T")
# These are all the old Bazaar repository formats that we might encounter
# in the wild. Bazaar's `clone` does not result in an upgrade, it needs to be
# explicit.
older_repository_formats = {
b"Bazaar Knit Repository Format 3 (bzr 0.15)\n",
b"Bazaar Knit Repository Format 4 (bzr 1.0)\n",
b"Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n",
b"Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n",
b"Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n",
b"Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n",
b"Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n",
b"Bazaar development format 2 with subtree support \
(needs bzr.dev from before 1.8)\n",
b"Bazaar development format 8\n",
b"Bazaar pack repository format 1 (needs bzr 0.92)\n",
b"Bazaar pack repository format 1 with rich root (needs bzr 1.0)\n",
b"Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n",
b"Bazaar-NG Knit Repository Format 1",
}
# Latest one as of this time, unlikely to change
expected_repository_format = b"Bazaar repository format 2a (needs bzr 1.16 or later)\n"
class UnknownRepositoryFormat(Exception):
"""The repository we're trying to load is using an unknown format.
It's possible (though unlikely) that a new format has come out, we should
check before dismissing the repository as broken or unsupported."""
class BzrDirectory(from_disk.Directory):
"""A more practical directory.
- creates missing parent directories
- removes empty directories
"""
def __setitem__(
self, path: bytes, value: Union[from_disk.Content, "BzrDirectory"]
) -> None:
if b"/" in path:
head, tail = path.split(b"/", 1)
directory = self.get(head)
if directory is None or isinstance(directory, from_disk.Content):
directory = BzrDirectory()
self[head] = directory
directory[tail] = value
else:
super().__setitem__(path, value)
def __delitem__(self, path: bytes) -> None:
super().__delitem__(path)
while b"/" in path: # remove empty parent directories
path = path.rsplit(b"/", 1)[0]
if len(self[path]) == 0:
super().__delitem__(path)
else:
break
def get(
self, path: bytes, default: Optional[T] = None
) -> Optional[Union[from_disk.Content, "BzrDirectory", T]]:
# TODO move to swh.model.from_disk.Directory
try:
return self[path]
except KeyError:
return default
def sort_changes(change: InventoryTreeChange) -> str:
"""Key function for sorting the changes by path.
Sorting allows us to group the folders together (for example "b", then "a/a",
then "a/b"). Reversing this sort in the `sorted()` call will make it
so the files appear before the folder ("a/a", then "a") if the folder has
changed. This removes a bug where the order of operations is:
- "a" goes from directory to file, removing all of its subtree
- "a/a" is removed, but our structure has already forgotten it"""
source_path, target_path = change.path
# Neither path can be the empty string
return source_path or target_path
class BazaarLoader(BaseLoader):
"""Loads a Bazaar repository"""
visit_type = "bzr"
def __init__(
self,
storage: StorageInterface,
url: str,
directory: Optional[str] = None,
- logging_class: str = "swh.loader.bzr.Loader",
visit_date: Optional[datetime] = None,
temp_directory: str = "/tmp",
clone_timeout_seconds: int = 7200,
- max_content_size: Optional[int] = None,
+ **kwargs: Any,
):
- super().__init__(
- storage=storage,
- logging_class=logging_class,
- max_content_size=max_content_size,
- )
+ super().__init__(storage=storage, origin_url=url, **kwargs)
self._temp_directory = temp_directory
self._clone_timeout = clone_timeout_seconds
self._revision_id_to_sha1git: Dict[BzrRevisionId, Sha1Git] = {}
self._last_root = BzrDirectory()
self._tags: Optional[Dict[bytes, BzrRevisionId]] = None
self._head_revision_id: Optional[bytes] = None
# Remember the previous revision to only compute the delta between
# revisions
self._prev_revision: Optional[BzrRevision] = None
self._branch: Optional[BzrBranch] = None
# Revisions that are pointed to, but don't exist in the current branch
# Rare, but exist usually for cross-VCS references.
self._ghosts: Set[BzrRevisionId] = set()
# Exists if in an incremental run, is the latest saved revision from
# this origin
self._latest_head: Optional[BzrRevisionId] = None
self._load_status = "eventful"
- self.origin_url = url
- self.visit_date = visit_date
+ self.visit_date = visit_date or self.visit_date
self.directory = directory
self.repo: Optional[repository.Repository] = None
def pre_cleanup(self) -> None:
"""As a first step, will try and check for dangling data to cleanup.
This should do its best to avoid raising issues.
"""
clean_dangling_folders(
self._temp_directory,
pattern_check=TEMPORARY_DIR_PREFIX_PATTERN,
log=self.log,
)
- def prepare_origin_visit(self) -> None:
- """First step executed by the loader to prepare origin and visit
- references. Set/update self.origin, and
- optionally self.origin_url, self.visit_date.
-
- """
- self.origin = Origin(url=self.origin_url)
-
def prepare(self) -> None:
"""Second step executed by the loader to prepare some state needed by
the loader.
"""
- latest_snapshot = snapshot_get_latest(self.storage, self.origin_url)
+ latest_snapshot = snapshot_get_latest(self.storage, self.origin.url)
if latest_snapshot:
self._set_recorded_state(latest_snapshot)
def load_status(self) -> Dict[str, str]:
"""Detailed loading status.
Defaults to logging an eventful load.
Returns: a dictionary that is eventually passed back as the task's
result to the scheduler, allowing tuning of the task recurrence
mechanism.
"""
return {
"status": self._load_status,
}
def _set_recorded_state(self, latest_snapshot: Snapshot) -> None:
if not latest_snapshot.branches:
# Last snapshot was empty
return
head = latest_snapshot.branches[b"trunk"]
bzr_head = self._get_extids_for_targets([head.target])[0].extid
self._latest_head = BzrRevisionId(bzr_head)
def _get_extids_for_targets(self, targets: List[Sha1Git]) -> List[ExtID]:
"""Get all Bzr ExtIDs for the targets in the latest snapshot"""
extids = []
for extid in self.storage.extid_get_from_target(
swhids.ObjectType.REVISION,
targets,
extid_type=EXTID_TYPE,
extid_version=EXTID_VERSION,
):
extids.append(extid)
self._revision_id_to_sha1git[
BzrRevisionId(extid.extid)
] = extid.target.object_id
if extids:
# Filter out dangling extids, we need to load their target again
revisions_missing = self.storage.revision_missing(
[extid.target.object_id for extid in extids]
)
extids = [
extid
for extid in extids
if extid.target.object_id not in revisions_missing
]
return extids
def cleanup(self) -> None:
if self.repo is not None:
self.repo.unlock()
def get_repo_and_branch(self) -> Tuple[repository.Repository, BzrBranch]:
_, branch, repo, _ = bzrdir.BzrDir.open_containing_tree_branch_or_repository(
self._repo_directory
)
return repo, branch
def run_upgrade(self):
"""Upgrade both repository and branch to the most recent supported version
to be compatible with the loader."""
cmd_upgrade().run(self._repo_directory, clean=True)
def fetch_data(self) -> bool:
"""Fetch the data from the source the loader is currently loading
Returns:
a value that is interpreted as a boolean. If True, fetch_data needs
to be called again to complete loading.
"""
if not self.directory: # no local repository
self._repo_directory = mkdtemp(
prefix=TEMPORARY_DIR_PREFIX_PATTERN,
suffix=f"-{os.getpid()}",
dir=self._temp_directory,
)
msg = "Cloning '%s' to '%s' with timeout %s seconds"
self.log.debug(
- msg, self.origin_url, self._repo_directory, self._clone_timeout
+ msg, self.origin.url, self._repo_directory, self._clone_timeout
)
closure = partial(
cmd_branch().run,
- self.origin_url,
+ self.origin.url,
self._repo_directory,
no_tree=True,
use_existing_dir=True,
)
clone_with_timeout(
- self.origin_url, self._repo_directory, closure, self._clone_timeout
+ self.origin.url, self._repo_directory, closure, self._clone_timeout
)
else: # existing local repository
# Allow to load on disk repository without cloning
# for testing purpose.
self.log.debug("Using local directory '%s'", self.directory)
self._repo_directory = self.directory
repo, branch = self.get_repo_and_branch()
repository_format = repo._format.as_string() # lies about being a string
if not repository_format == expected_repository_format:
if repository_format in older_repository_formats:
self.log.debug(
"Upgrading repository from format '%s'",
repository_format.decode("ascii").strip("\n"),
)
self.run_upgrade()
repo, branch = self.get_repo_and_branch()
else:
raise UnknownRepositoryFormat()
if not branch.supports_tags():
# Some repos have the right format marker but their branches do not
# support tags
self.log.debug("Branch does not support tags, upgrading")
self.run_upgrade()
repo, branch = self.get_repo_and_branch()
# We could set the branch here directly, but we want to run the
# sanity checks in the `self.branch` property, so let's make sure
# we invalidate the "cache".
self._branch = None
self.repo = repo
self.repo.lock_read()
self.head_revision_id # set the property
self.tags # set the property
return False
def store_data(self) -> None:
"""Store fetched data in the database."""
assert self.repo is not None
assert self.tags is not None
# Insert revisions using a topological sorting
revs = self._get_bzr_revs_to_load()
if revs and revs[0] == NULL_REVISION:
# The first rev we load isn't necessarily `NULL_REVISION` even in a
# full load, as bzr allows for ghost revisions.
revs = revs[1:]
length_ingested_revs = 0
for rev in revs:
self.store_revision(self.repo.get_revision(rev))
length_ingested_revs += 1
if length_ingested_revs == 0:
# no new revision ingested, so uneventful
# still we'll make a snapshot, so we continue
self._load_status = "uneventful"
snapshot_branches: Dict[bytes, Optional[SnapshotBranch]] = {}
for tag_name, target in self.tags.items():
label = b"tags/%s" % tag_name
if target == NULL_REVISION:
# Some very rare repositories have meaningless tags that point
# to the null revision.
self.log.debug("Tag '%s' points to the null revision", tag_name)
snapshot_branches[label] = None
continue
try:
# Used only to detect corruption
self.branch.revision_id_to_dotted_revno(target)
except (
bzr_errors.NoSuchRevision,
bzr_errors.GhostRevisionsHaveNoRevno,
bzr_errors.UnsupportedOperation,
):
# Bad tag data/merges can lead to tagged revisions
# which are not in this branch. We cannot point a tag there.
snapshot_branches[label] = None
continue
snp_target = self._get_revision_id_from_bzr_id(target)
snapshot_branches[label] = SnapshotBranch(
target=self.store_release(tag_name, snp_target),
target_type=TargetType.RELEASE,
)
if self.head_revision_id != NULL_REVISION:
head_revision_git_hash = self._get_revision_id_from_bzr_id(
self.head_revision_id
)
snapshot_branches[b"trunk"] = SnapshotBranch(
target=head_revision_git_hash, target_type=TargetType.REVISION
)
snapshot_branches[b"HEAD"] = SnapshotBranch(
- target=b"trunk", target_type=TargetType.ALIAS,
+ target=b"trunk",
+ target_type=TargetType.ALIAS,
)
snapshot = Snapshot(branches=snapshot_branches)
self.storage.snapshot_add([snapshot])
self.flush()
self.loaded_snapshot_id = snapshot.id
def store_revision(self, bzr_rev: BzrRevision) -> None:
self.log.debug("Storing revision '%s'", bzr_rev.revision_id)
directory = self.store_directories(bzr_rev)
associated_bugs = [
(b"bug", b"%s %s" % (status.encode(), url.encode()))
for url, status in bzr_rev.iter_bugs()
]
extra_headers = [
- (b"time_offset_seconds", str(bzr_rev.timezone).encode(),),
+ (
+ b"time_offset_seconds",
+ str(bzr_rev.timezone).encode(),
+ ),
*associated_bugs,
]
timestamp = Timestamp(int(bzr_rev.timestamp), 0)
timezone = round(int(bzr_rev.timezone) / 60)
date = TimestampWithTimezone.from_numeric_offset(timestamp, timezone, False)
# TODO (how) should we store multiple authors? (T3887)
revision = Revision(
author=Person.from_fullname(bzr_rev.get_apparent_authors()[0].encode()),
date=date,
committer=Person.from_fullname(bzr_rev.committer.encode()),
committer_date=date,
type=RevisionType.BAZAAR,
directory=directory,
message=bzr_rev.message.encode(),
extra_headers=extra_headers,
synthetic=False,
parents=self._get_revision_parents(bzr_rev),
)
self._revision_id_to_sha1git[bzr_rev.revision_id] = revision.id
self.storage.revision_add([revision])
self.storage.extid_add(
[
ExtID(
extid_type=EXTID_TYPE,
extid_version=EXTID_VERSION,
extid=bzr_rev.revision_id,
target=revision.swhid(),
)
]
)
def store_directories(self, bzr_rev: BzrRevision) -> Sha1Git:
"""Store a revision's directories."""
repo: repository.Repository = self.repo
inventory: Inventory = repo.get_inventory(bzr_rev.revision_id)
if self._prev_revision is None:
self._store_directories_slow(bzr_rev, inventory)
return self._store_tree(bzr_rev)
old_tree = self._get_revision_tree(self._prev_revision.revision_id)
new_tree = self._get_revision_tree(bzr_rev.revision_id)
delta = new_tree.changes_from(old_tree)
if delta.renamed or delta.copied:
# Figuring out all nested and possibly conflicting renames is a lot
# of effort for very few revisions, just go the slow way
self._store_directories_slow(bzr_rev, inventory)
return self._store_tree(bzr_rev)
to_remove = sorted(
delta.removed + delta.missing, key=sort_changes, reverse=True
)
for change in to_remove:
if change.kind[0] == "directory":
# empty directories will delete themselves in `self._last_root`
continue
path = change.path[0]
del self._last_root[path.encode()]
# `delta.kind_changed` needs to happen before `delta.added` since a file
# could be added under a node that changed from directory to file at the
# same time, for example
for change in itertools.chain(delta.kind_changed, delta.added, delta.modified):
path = change.path[1]
entry = inventory.get_entry(change.file_id)
content = self.store_content(bzr_rev, path, entry)
self._last_root[path.encode()] = content
self._prev_revision = bzr_rev
return self._store_tree(bzr_rev)
def store_release(self, name: bytes, target: Sha1Git) -> Sha1Git:
"""Store a release given its name and its target.
Args:
name: name of the release.
target: sha1_git of the target revision.
Returns:
the sha1_git of the stored release.
"""
release = Release(
name=name,
target=target,
target_type=ObjectType.REVISION,
message=None,
metadata=None,
synthetic=False,
author=Person(name=None, email=None, fullname=b""),
date=None,
)
self.storage.release_add([release])
return release.id
def store_content(
self, bzr_rev: BzrRevision, file_path: str, entry: InventoryEntry
) -> from_disk.Content:
if entry.executable:
perms = from_disk.DentryPerms.executable_content
elif entry.kind == "directory":
perms = from_disk.DentryPerms.directory
elif entry.kind == "symlink":
perms = from_disk.DentryPerms.symlink
elif entry.kind == "file":
perms = from_disk.DentryPerms.content
else: # pragma: no cover
raise RuntimeError("Hit unreachable condition")
data = b""
if entry.has_text():
rev_tree = self._get_revision_tree(bzr_rev.revision_id)
data = rev_tree.get_file(file_path).read()
assert len(data) == entry.text_size
content = Content.from_data(data)
self.storage.content_add([content])
return from_disk.Content({"sha1_git": content.sha1_git, "perms": perms})
def _get_bzr_revs_to_load(self) -> List[BzrRevision]:
assert self.repo is not None
repo: repository.Repository = self.repo
self.log.debug("Getting fully sorted revision tree")
if self.head_revision_id == NULL_REVISION:
return []
head_revision = repo.get_revision(self.head_revision_id)
# bazaar's model doesn't allow it to iterate on its graph from
# the bottom lazily, but basically all DAGs (especially bzr ones)
# are small enough to fit in RAM.
ancestors_iter = self._iterate_ancestors(head_revision)
ancestry = []
for rev, parents in ancestors_iter:
if parents is None:
# Filter out ghosts, they scare the `TopoSorter`.
# Store them to later catch exceptions about missing parent revision
self._ghosts.add(rev)
continue
ancestry.append((rev, parents))
sorter = tsort.TopoSorter(ancestry)
all_revisions = sorter.sorted()
if self._latest_head is not None:
# Breezy does not offer a generic querying system, so we do the
# filtering ourselves, which is simple enough given that bzr does
# not have multiple heads per branch
found = False
new_revisions = []
# Filter out revisions until we reach the one we've already seen
for rev in all_revisions:
if not found:
if rev == self._latest_head:
found = True
else:
new_revisions.append(rev)
if not found and all_revisions:
# The previously saved head has been uncommitted, reload
# everything
msg = "Previous head (%s) not found, loading all revisions"
self.log.debug(msg, self._latest_head)
return all_revisions
return new_revisions
return all_revisions
def _iterate_ancestors(self, rev: BzrRevision) -> Iterator[BzrRevisionId]:
"""Return an iterator of this revision's ancestors"""
assert self.repo is not None
return self.repo.get_graph().iter_ancestry([rev.revision_id])
# We want to cache at most the current revision and the last, no need to
# take cache more than this.
@lru_cache(maxsize=2)
def _get_revision_tree(self, rev: BzrRevisionId) -> Tree:
assert self.repo is not None
return self.repo.revision_tree(rev)
def _store_tree(self, bzr_rev: BzrRevision) -> Sha1Git:
"""Save the current in-memory tree to storage."""
directories: List[from_disk.Directory] = [self._last_root]
while directories:
directory = directories.pop()
self.storage.directory_add([directory.to_model()])
directories.extend(
[
item
for item in directory.values()
if isinstance(item, from_disk.Directory)
]
)
self._prev_revision = bzr_rev
return self._last_root.hash
def _store_directories_slow(
self, bzr_rev: BzrRevision, inventory: Inventory
) -> None:
"""Store a revision's directories.
This is the slow variant: it does not use a diff from the last revision
but lists all the files. It is used for the first revision of a load
(the null revision for a full run, the last recorded head for an
incremental one) or for cases where the headaches of figuring out the
delta from the breezy primitives is not worth it.
"""
# Don't reuse the last root, we're listing everything anyway, and we
# could be keeping around deleted files
self._last_root = BzrDirectory()
for path, entry in inventory.iter_entries():
if path == "":
# root repo is created by default
continue
content = self.store_content(bzr_rev, path, entry)
self._last_root[path.encode()] = content
def _get_revision_parents(self, bzr_rev: BzrRevision) -> Tuple[Sha1Git, ...]:
parents = []
for parent_id in bzr_rev.parent_ids:
if parent_id == NULL_REVISION:
# Paranoid, don't think that actually happens
continue
try:
revision_id = self._get_revision_id_from_bzr_id(parent_id)
except LookupError:
if parent_id in self._ghosts:
# We can't store ghosts in any meaningful way (yet?). They
# have no contents by definition, and they're pretty rare,
# so just ignore them.
continue
raise
parents.append(revision_id)
return tuple(parents)
def _get_revision_id_from_bzr_id(self, bzr_id: BzrRevisionId) -> Sha1Git:
"""Return the git sha1 of a revision given its bazaar revision id."""
from_cache = self._revision_id_to_sha1git.get(bzr_id)
if from_cache is not None:
return from_cache
# The parent was not loaded in this run, get it from storage
from_storage = self.storage.extid_get_from_extid(
EXTID_TYPE, ids=[bzr_id], version=EXTID_VERSION
)
if len(from_storage) != 1:
msg = "Expected 1 match from storage for bzr node %r, got %d"
raise LookupError(msg % (bzr_id.hex(), len(from_storage)))
return from_storage[0].target.object_id
@property
def branch(self) -> BzrBranch:
"""Returns the only branch in the current repository.
Bazaar branches can be assimilated to repositories in other VCS like
Git or Mercurial. By contrast, a Bazaar repository is just a store of
revisions to optimize disk usage, with no particular semantics."""
assert self.repo is not None
branches = list(self.repo.find_branches(using=True))
msg = "Expected only 1 branch in the repository, got %d"
assert len(branches) == 1, msg % len(branches)
self._branch = branches[0]
return branches[0]
@property
def head_revision_id(self) -> BzrRevisionId:
"""Returns the Bazaar revision id of the branch's head.
Bazaar/Breezy branches do not have multiple heads."""
assert self.repo is not None
if self._head_revision_id is None:
self._head_revision_id = self.branch.last_revision()
assert self._head_revision_id is not None
return BzrRevisionId(self._head_revision_id)
@property
def tags(self) -> Optional[Dict[bytes, BzrRevisionId]]:
assert self.repo is not None
if self._tags is None:
self._tags = {
n.encode(): r for n, r in self.branch.tags.get_tag_dict().items()
}
return self._tags
diff --git a/swh/loader/bzr/tests/test_loader.py b/swh/loader/bzr/tests/test_loader.py
index 399c9ec..c96f5aa 100644
--- a/swh/loader/bzr/tests/test_loader.py
+++ b/swh/loader/bzr/tests/test_loader.py
@@ -1,430 +1,430 @@
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
from pathlib import Path
from breezy.builtins import cmd_uncommit
import pytest
from swh.loader.bzr.loader import BazaarLoader, BzrDirectory
from swh.loader.tests import (
assert_last_visit_matches,
get_stats,
prepare_repository_from_archive,
)
from swh.model.from_disk import Content
from swh.model.hashutil import hash_to_bytes
from swh.storage.algos.snapshot import snapshot_get_latest
# Generated repositories:
# - needs-upgrade:
# - Repository needs upgrade
# - empty:
# - Empty repo
# - renames:
# - File rename
# - Directory renames
# - Directory renames *and* file rename conflicting
# - no-branch:
# - No branch
# - metadata-and-type-changes:
# - Directory removed
# - Kind changed (file to symlink, directory to file, etc.)
# - not changed_content and not renamed and not kind_changed (so, exec file?)
# - Executable file
# - Empty commit (bzr commit --unchanged)
# - ghosts
# - Ghost revisions
# - broken-tags
# - Tags corruption
# - does-not-support-tags
# - Repo is recent but branch does not support tags, needs upgraded
# TODO tests:
# - Root path listed in changes (does that even happen?)
# - Parent is :null (does that even happen?)
# - Case insensitive removal (Is it actually a problem?)
# - Truly corrupted revision?
# - No match from storage (wrong topo sort or broken rev)
def do_uncommit(repo_url):
"""Remove the latest revision from the given bzr repo"""
uncommit_cmd = cmd_uncommit()
with open(os.devnull, "w") as f:
uncommit_cmd.outf = f
uncommit_cmd.run(repo_url)
@pytest.mark.parametrize("do_clone", [False, True])
def test_nominal(swh_storage, datadir, tmp_path, do_clone):
archive_path = Path(datadir, "nominal.tgz")
repo_url = prepare_repository_from_archive(archive_path, "nominal", tmp_path)
if do_clone:
# Check that the cloning mechanism works
loader = BazaarLoader(swh_storage, repo_url)
else:
loader = BazaarLoader(swh_storage, repo_url, directory=repo_url)
res = loader.load()
assert res == {"status": "eventful"}
assert_last_visit_matches(swh_storage, repo_url, status="full", type="bzr")
snapshot = snapshot_get_latest(swh_storage, repo_url)
expected_branches = [
b"HEAD",
b"tags/0.1",
b"tags/latest",
b"tags/other-tag",
b"trunk",
]
assert sorted(snapshot.branches.keys()) == expected_branches
stats = get_stats(swh_storage)
assert stats == {
"content": 7,
"directory": 7,
"origin": 1,
"origin_visit": 1,
"release": 3,
"revision": 6,
"skipped_content": 0,
"snapshot": 1,
}
# It contains associated bugs, making it a good complete candidate
example_revision = hash_to_bytes("18bb5b2c866c10c58a191afcd0b450a8727f1c62")
revision = loader.storage.revision_get([example_revision])[0]
assert revision.to_dict() == {
"message": b"fixing bugs",
"author": {
"fullname": b"Rapha\xc3\xabl Gom\xc3\xa8s ",
"name": b"Rapha\xc3\xabl Gom\xc3\xa8s",
"email": b"alphare@alphare-carbon.lan",
},
"committer": {
"fullname": b"Rapha\xc3\xabl Gom\xc3\xa8s ",
"name": b"Rapha\xc3\xabl Gom\xc3\xa8s",
"email": b"alphare@alphare-carbon.lan",
},
"date": {
"timestamp": {"seconds": 1643302390, "microseconds": 0},
- "offset": 60,
- "negative_utc": False,
"offset_bytes": b"+0100",
},
"committer_date": {
"timestamp": {"seconds": 1643302390, "microseconds": 0},
- "offset": 60,
- "negative_utc": False,
"offset_bytes": b"+0100",
},
"type": "bzr",
"directory": b"s0\xf3pe\xa3\x12\x05{\xc7\xbc\x86\xa6\x14.\xc1b\x1c\xeb\x05",
"synthetic": False,
"metadata": None,
"parents": (b"*V\xf5\n\xf0?\x1d{kE4\xda(\xb1\x08R\x83\x87-\xb6",),
"id": example_revision,
"extra_headers": (
(b"time_offset_seconds", b"3600"),
(b"bug", b"fixed https://launchpad.net/bugs/1234"),
(b"bug", b"fixed https://bz.example.com/?show_bug=4321"),
),
}
def test_needs_upgrade(swh_storage, datadir, tmp_path, mocker):
"""Old bzr repository format should be upgraded to latest format"""
archive_path = Path(datadir, "needs-upgrade.tgz")
repo_url = prepare_repository_from_archive(archive_path, "needs-upgrade", tmp_path)
loader = BazaarLoader(swh_storage, repo_url, directory=repo_url)
upgrade_spy = mocker.spy(loader, "run_upgrade")
res = loader.load()
upgrade_spy.assert_called()
assert res == {"status": "uneventful"} # needs-upgrade is an empty repo
def test_does_not_support_tags(swh_storage, datadir, tmp_path, mocker):
"""Repository format is correct, but the branch itself does not support tags
and should be upgraded to the latest format"""
archive_path = Path(datadir, "does-not-support-tags.tgz")
path = "does-not-support-tags-repo/does-not-support-tags-branch"
- repo_url = prepare_repository_from_archive(archive_path, path, tmp_path,)
+ repo_url = prepare_repository_from_archive(
+ archive_path,
+ path,
+ tmp_path,
+ )
loader = BazaarLoader(swh_storage, repo_url, directory=repo_url)
upgrade_spy = mocker.spy(loader, "run_upgrade")
res = loader.load()
upgrade_spy.assert_called()
assert res == {"status": "uneventful"} # does-not-support-tags is an empty repo
def test_no_branch(swh_storage, datadir, tmp_path):
"""This should only happen with a broken clone, so the expected result is failure"""
archive_path = Path(datadir, "no-branch.tgz")
repo_url = prepare_repository_from_archive(archive_path, "no-branch", tmp_path)
res = BazaarLoader(swh_storage, repo_url, directory=repo_url).load()
assert res == {"status": "failed"}
def test_empty(swh_storage, datadir, tmp_path):
"""An empty repository is fine, it's just got no information"""
archive_path = Path(datadir, "empty.tgz")
repo_url = prepare_repository_from_archive(archive_path, "empty", tmp_path)
res = BazaarLoader(swh_storage, repo_url, directory=repo_url).load()
assert res == {"status": "uneventful"}
# Empty snapshot does not bother the incremental code
res = BazaarLoader(swh_storage, repo_url, directory=repo_url).load()
assert res == {"status": "uneventful"}
def test_renames(swh_storage, datadir, tmp_path):
archive_path = Path(datadir, "renames.tgz")
repo_url = prepare_repository_from_archive(archive_path, "renames", tmp_path)
res = BazaarLoader(swh_storage, repo_url, directory=repo_url).load()
assert res == {"status": "eventful"}
assert_last_visit_matches(swh_storage, repo_url, status="full", type="bzr")
snapshot = snapshot_get_latest(swh_storage, repo_url)
assert sorted(snapshot.branches.keys()) == [
b"HEAD",
b"trunk",
]
stats = get_stats(swh_storage)
assert stats == {
"content": 1,
"directory": 5,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 2,
"skipped_content": 0,
"snapshot": 1,
}
def test_broken_tags(swh_storage, datadir, tmp_path):
"""A tag pointing to a the null revision should not break anything"""
archive_path = Path(datadir, "broken-tags.tgz")
repo_url = prepare_repository_from_archive(archive_path, "broken-tags", tmp_path)
res = BazaarLoader(swh_storage, repo_url, directory=repo_url).load()
assert res == {"status": "uneventful"}
assert_last_visit_matches(swh_storage, repo_url, status="full", type="bzr")
snapshot = snapshot_get_latest(swh_storage, repo_url)
assert sorted(snapshot.branches.keys()) == [
b"tags/null-tag", # broken tag does appear, but didn't cause any issues
]
stats = get_stats(swh_storage)
assert stats == {
"content": 0,
"directory": 0,
"origin": 1,
"origin_visit": 1,
"release": 0, # Does not count as a valid release
"revision": 0,
"skipped_content": 0,
"snapshot": 1,
}
def test_metadata_and_type_changes(swh_storage, datadir, tmp_path):
archive_path = Path(datadir, "metadata-and-type-changes.tgz")
repo_url = prepare_repository_from_archive(
archive_path, "metadata-and-type-changes", tmp_path
)
res = BazaarLoader(swh_storage, repo_url, directory=repo_url).load()
assert res == {"status": "eventful"}
assert_last_visit_matches(swh_storage, repo_url, status="full", type="bzr")
snapshot = snapshot_get_latest(swh_storage, repo_url)
assert sorted(snapshot.branches.keys()) == [
b"HEAD",
b"trunk",
]
stats = get_stats(swh_storage)
assert stats == {
"content": 1,
"directory": 9,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 7,
"skipped_content": 0,
"snapshot": 1,
}
def test_ghosts(swh_storage, datadir, tmp_path):
archive_path = Path(datadir, "ghosts.tgz")
repo_url = prepare_repository_from_archive(archive_path, "ghosts", tmp_path)
loader = BazaarLoader(swh_storage, repo_url, directory=repo_url)
assert loader._ghosts == set()
res = loader.load()
assert loader._ghosts == set((b"iamaghostboo",))
assert res == {"status": "eventful"}
assert_last_visit_matches(swh_storage, repo_url, status="full", type="bzr")
snapshot = snapshot_get_latest(swh_storage, repo_url)
assert sorted(snapshot.branches.keys()) == [
b"HEAD",
b"tags/brokentag", # tag pointing to a ghost revision is tracked
b"trunk",
]
stats = get_stats(swh_storage)
assert stats == {
"content": 0, # No contents
"directory": 1, # Root directory always counts
"origin": 1,
"origin_visit": 1,
"release": 0, # Ghost tag is ignored, stored as dangling
"revision": 1, # Only one revision, the ghost is ignored
"skipped_content": 0,
"snapshot": 1,
}
def test_bzr_directory():
directory = BzrDirectory()
directory[b"a/decently/enough/nested/path"] = Content(b"whatever")
directory[b"a/decently/other_node"] = Content(b"whatever else")
directory[b"another_node"] = Content(b"contents")
assert directory[b"a/decently/enough/nested/path"] == Content(b"whatever")
assert directory[b"a/decently/other_node"] == Content(b"whatever else")
assert directory[b"another_node"] == Content(b"contents")
del directory[b"a/decently/enough/nested/path"]
assert directory.get(b"a/decently/enough/nested/path") is None
assert directory.get(b"a/decently/enough/nested/") is None
assert directory.get(b"a/decently/enough") is None
# no KeyError
directory[b"a/decently"]
directory[b"a"]
directory[b"another_node"]
def test_incremental_noop(swh_storage, datadir, tmp_path):
"""Check that nothing happens if we try to load a repo twice in a row"""
archive_path = Path(datadir, "nominal.tgz")
repo_url = prepare_repository_from_archive(archive_path, "nominal", tmp_path)
loader = BazaarLoader(swh_storage, repo_url, directory=repo_url)
res = loader.load()
assert res == {"status": "eventful"}
loader = BazaarLoader(swh_storage, repo_url, directory=repo_url)
res = loader.load()
assert res == {"status": "uneventful"}
def test_incremental_nominal(swh_storage, datadir, tmp_path):
"""Check that an updated repository does update after the second run, but
is still a noop in the third run."""
archive_path = Path(datadir, "nominal.tgz")
repo_url = prepare_repository_from_archive(archive_path, "nominal", tmp_path)
# remove 2 latest commits
do_uncommit(repo_url)
do_uncommit(repo_url)
loader = BazaarLoader(swh_storage, repo_url, directory=repo_url)
res = loader.load()
assert res == {"status": "eventful"}
stats = get_stats(swh_storage)
assert stats == {
"content": 6,
"directory": 4,
"origin": 1,
"origin_visit": 1,
"release": 2,
"revision": 4,
"skipped_content": 0,
"snapshot": 1,
}
# Load the complete repo now
repo_url = prepare_repository_from_archive(archive_path, "nominal", tmp_path)
loader = BazaarLoader(swh_storage, repo_url, directory=repo_url)
res = loader.load()
assert res == {"status": "eventful"}
stats = get_stats(swh_storage)
expected_stats = {
"content": 7,
"directory": 7,
"origin": 1,
"origin_visit": 2,
"release": 3,
"revision": 6,
"skipped_content": 0,
"snapshot": 2,
}
assert stats == expected_stats
# Nothing should change
loader = BazaarLoader(swh_storage, repo_url, directory=repo_url)
res = loader.load()
assert res == {"status": "uneventful"}
stats = get_stats(swh_storage)
assert stats == {**expected_stats, "origin_visit": 2 + 1}
def test_incremental_uncommitted_head(swh_storage, datadir, tmp_path):
"""Check that doing an incremental run with the saved head missing does not
error out but instead loads everything correctly"""
archive_path = Path(datadir, "nominal.tgz")
repo_url = prepare_repository_from_archive(archive_path, "nominal", tmp_path)
loader = BazaarLoader(swh_storage, repo_url, directory=repo_url)
res = loader.load()
assert res == {"status": "eventful"}
stats = get_stats(swh_storage)
expected_stats = {
"content": 7,
"directory": 7,
"origin": 1,
"origin_visit": 1,
"release": 3,
"revision": 6,
"skipped_content": 0,
"snapshot": 1,
}
assert stats == expected_stats
# Remove the previously saved head
do_uncommit(repo_url)
loader = BazaarLoader(swh_storage, repo_url, directory=repo_url)
res = loader.load()
assert res == {"status": "eventful"}
# Everything is loaded correctly
stats = get_stats(swh_storage)
assert stats == {**expected_stats, "origin_visit": 1 + 1, "snapshot": 1 + 1}
diff --git a/swh/loader/bzr/tests/test_tasks.py b/swh/loader/bzr/tests/test_tasks.py
index aa2330f..7e7158d 100644
--- a/swh/loader/bzr/tests/test_tasks.py
+++ b/swh/loader/bzr/tests/test_tasks.py
@@ -1,23 +1,27 @@
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
def test_loader(
mocker, swh_config, swh_scheduler_celery_app, swh_scheduler_celery_worker
):
mock_loader = mocker.patch("swh.loader.bzr.loader.BazaarLoader.load")
mock_loader.return_value = {"status": "eventful"}
res = swh_scheduler_celery_app.send_task(
"swh.loader.bzr.tasks.LoadBazaar",
- kwargs={"url": "origin_url", "directory": "/some/repo", "visit_date": "now",},
+ kwargs={
+ "url": "origin_url",
+ "directory": "/some/repo",
+ "visit_date": "now",
+ },
)
assert res
res.wait()
assert res.successful()
assert res.result == {"status": "eventful"}
mock_loader.assert_called_once_with()
diff --git a/tox.ini b/tox.ini
index af808eb..7882424 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,73 +1,74 @@
[tox]
envlist=black,flake8,mypy,py3
[testenv]
extras =
testing
deps =
pytest-cov
commands =
pytest --doctest-modules \
{envsitepackagesdir}/swh/loader/bzr \
--cov={envsitepackagesdir}/swh/loader/bzr \
--cov-branch {posargs}
[testenv:black]
skip_install = true
deps =
- black==19.10b0
+ black==22.3.0
commands =
{envpython} -m black --check swh
[testenv:flake8]
skip_install = true
deps =
- flake8
+ flake8==4.0.1
+ flake8-bugbear==22.3.23
commands =
{envpython} -m flake8
[testenv:mypy]
extras =
testing
deps =
mypy==0.920
commands =
mypy swh
# build documentation outside swh-environment using the current
# git HEAD of swh-docs, is executed on CI for each diff to prevent
# breaking doc build
[testenv:sphinx]
whitelist_externals = make
usedevelop = true
extras =
testing
deps =
# fetch and install swh-docs in develop mode
-e git+https://forge.softwareheritage.org/source/swh-docs#egg=swh.docs
setenv =
SWH_PACKAGE_DOC_TOX_BUILD = 1
# turn warnings into errors
SPHINXOPTS = -W
commands =
make -I ../.tox/sphinx/src/swh-docs/swh/ -C docs
# build documentation only inside swh-environment using local state
# of swh-docs package
[testenv:sphinx-dev]
whitelist_externals = make
usedevelop = true
extras =
testing
deps =
# install swh-docs in develop mode
-e ../swh-docs
setenv =
SWH_PACKAGE_DOC_TOX_BUILD = 1
# turn warnings into errors
SPHINXOPTS = -W
commands =
make -I ../.tox/sphinx-dev/src/swh-docs/swh/ -C docs