This error seems to be relative to parsing error of legacy object within the dulwich dependency.
To reproduce, use the following repository /srv/storage/space/mirrors/gitorious.org/mnt/repositories/sortix/sortix-gitorious-wiki.git:
repo = 'sortix-gitorious-wiki.git' origin_url = 'http://foo/bar/git' import logging logging.basicConfig(level=logging.DEBUG) from swh.loader.git.tasks import LoadDiskGitRepository t = LoadDiskGitRepository() t.run(origin_url=origin_url, directory=repo, date='2016-05-03T15:16:32+00:00')
output:
DEBUG:swh.scheduler.task.LoadDiskGitRepository:Creating git origin for http://foo/bar/sortix-gitorious-wiki.git DEBUG:swh.scheduler.task.LoadDiskGitRepository:Done creating git origin for http://foo/bar/sortix-gitorious-wiki.git Traceback (most recent call last): File "./load-git-disk.py", line 20, in <module> main() File "/usr/lib/python3/dist-packages/click/core.py", line 716, in __call__ return self.main(*args, **kwargs) File "/usr/lib/python3/dist-packages/click/core.py", line 696, in main rv = self.invoke(ctx) File "/usr/lib/python3/dist-packages/click/core.py", line 889, in invoke return ctx.invoke(self.callback, **ctx.params) File "/usr/lib/python3/dist-packages/click/core.py", line 534, in invoke return callback(*args, **kwargs) File "./load-git-disk.py", line 17, in main t.run(origin_url=origin_url, directory=repo, date='2016-05-03T15:16:32+00:00') File "/home/tony/work/inria/repo/swh/swh-environment/swh-scheduler/swh/scheduler/task.py", line 35, in run raise e from None File "/home/tony/work/inria/repo/swh/swh-environment/swh-scheduler/swh/scheduler/task.py", line 32, in run result = self.run_task(*args, **kwargs) File "/home/tony/work/inria/repo/swh/swh-environment/swh-loader-git/swh/loader/git/tasks.py", line 39, in run_task return loader.load(origin_url, directory, dateutil.parser.parse(date)) File "/home/tony/work/inria/repo/swh/swh-environment/swh-loader-git/swh/loader/git/base.py", line 422, in load self.fetch_data() File "/home/tony/work/inria/repo/swh/swh-environment/swh-loader-git/swh/loader/git/loader.py", line 48, in fetch_data type_name = self.repo[oid].type_name File "/usr/lib/python3/dist-packages/dulwich/repo.py", line 470, in __getitem__ return self.object_store[name] File "/usr/lib/python3/dist-packages/dulwich/object_store.py", line 118, in __getitem__ type_num, uncomp = self.get_raw(sha) File "/usr/lib/python3/dist-packages/dulwich/object_store.py", line 372, in get_raw ret = self._get_loose_object(hexsha) File "/usr/lib/python3/dist-packages/dulwich/object_store.py", line 521, in _get_loose_object return ShaFile.from_path(path) File "/usr/lib/python3/dist-packages/dulwich/objects.py", line 370, in from_path return cls.from_file(f) File "/usr/lib/python3/dist-packages/dulwich/objects.py", line 376, in from_file obj = cls._parse_file(f) File "/usr/lib/python3/dist-packages/dulwich/objects.py", line 346, in _parse_file if cls._is_legacy_object(map): File "/usr/lib/python3/dist-packages/dulwich/objects.py", line 338, in _is_legacy_object b0 = ord(magic[0:1]) TypeError: ord() expected a character, but string of length 0 found
Note: load-git-disk.py is a wrapper around the scenario described (cf. P185)