diff --git a/swh/loader/mercurial/bundle20_reader.py b/swh/loader/mercurial/bundle20_reader.py --- a/swh/loader/mercurial/bundle20_reader.py +++ b/swh/loader/mercurial/bundle20_reader.py @@ -323,7 +323,7 @@ commit['time'] = datetime.fromtimestamp(float(tstamp)) commit['time_offset_seconds'] = int(tz) if extra: - commit['extra'] = extra[0] + commit['extra'] = b' '.join(extra) commit['changed_files'] = parts[3:] return commit diff --git a/swh/loader/mercurial/loader.py b/swh/loader/mercurial/loader.py --- a/swh/loader/mercurial/loader.py +++ b/swh/loader/mercurial/loader.py @@ -376,6 +376,10 @@ for e in extra.split(b'\x00'): k, v = e.split(b':', 1) k = k.decode('utf-8') + # transplant_source stores binary reference to a changeset + # prefer to dump hexadecimal one in the revision metadata + if k == 'transplant_source': + v = hash_to_hex(v) extra_meta.append([k, v]) revision = { diff --git a/swh/loader/mercurial/tests/resources/transplant.tgz b/swh/loader/mercurial/tests/resources/transplant.tgz new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ 0) + self.assertTrue(len(transplant_sources) > 0) + self.assertTrue(transplant_sources.issubset(hg_changesets))