On some disk repository, errors occur when trying to retrieve some unexisting object.
Steps to reproduce on a local storage with latest swh-loader-git.
Use /srv/storage/space/mirrors/gitorious.org/mnt/repositories/fe6/441/641fb6e08ddb2e4fd096dcf18e80b894bf.git:
repo = '641fb6e08ddb2e4fd096dcf18e80b894bf.git' origin_url = 'http://foo/bar/git' import logging logging.basicConfig(level=logging.DEBUG) from swh.loader.git.tasks import LoadDiskGitRepository t = LoadDiskGitRepository() t.run(origin_url=origin_url, directory=repo, date='2016-05-03T15:16:32+00:00')
output:
>>> repo = '641fb6e08ddb2e4fd096dcf18e80b894bf.git' >>> origin_url = 'http://foo/bar/git' >>> >>> import logging >>> logging.basicConfig(level=logging.DEBUG) >>> >>> from swh.loader.git.tasks import LoadDiskGitRepository >>> >>> t = LoadDiskGitRepository() >>> t.run(origin_url=origin_url, directory=repo, date='2016-05-03T15:16:32+00:00') DEBUG:swh.scheduler.task.LoadDiskGitRepository:Creating git origin for http://foo/bar/git DEBUG:swh.scheduler.task.LoadDiskGitRepository:Done creating git origin for http://foo/bar/git Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/home/tony/work/inria/repo/swh/swh-environment/swh-scheduler/swh/scheduler/task.py", line 35, in run raise e from None File "/home/tony/work/inria/repo/swh/swh-environment/swh-scheduler/swh/scheduler/task.py", line 32, in run result = self.run_task(*args, **kwargs) File "/home/tony/work/inria/repo/swh/swh-environment/swh-loader-git/swh/loader/git/tasks.py", line 39, in run_task return loader.load(origin_url, directory, dateutil.parser.parse(date)) File "/home/tony/work/inria/repo/swh/swh-environment/swh-loader-git/swh/loader/git/base.py", line 422, in load self.fetch_data() File "/home/tony/work/inria/repo/swh/swh-environment/swh-loader-git/swh/loader/git/loader.py", line 48, in fetch_data type_name = self.repo[oid].type_name File "/usr/lib/python3/dist-packages/dulwich/repo.py", line 474, in __getitem__ return self.object_store[self.refs[name]] File "/usr/lib/python3/dist-packages/dulwich/refs.py", line 244, in __getitem__ raise KeyError(name) KeyError: b'f2bd56289283900cd1dcf2c52b93952f98e45144.deleted'