diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py --- a/swh/indexer/metadata.py +++ b/swh/indexer/metadata.py @@ -164,7 +164,7 @@ """ result = { - 'id': rev['id'].decode(), + 'id': rev['id'], 'indexer_configuration_id': self.tool['id'], 'translated_metadata': None } @@ -287,12 +287,12 @@ Args: - * `origin_head` (dict): {str(origin_id): rev_id.encode()} + * `origin_head` (dict): {str(origin_id): rev_id} keys `origin_id` and `revision_id`, which is the result of OriginHeadIndexer. * `policy_update`: `'ignore-dups'` or `'update-dups'` """ - origin_head_map = {int(origin_id): rev_id + origin_head_map = {int(origin_id): hashutil.hash_to_bytes(rev_id) for (origin_id, rev_id) in origin_head.items()} # Fix up the argument order. revisions_metadata has to be the diff --git a/swh/indexer/origin_head.py b/swh/indexer/origin_head.py --- a/swh/indexer/origin_head.py +++ b/swh/indexer/origin_head.py @@ -11,6 +11,8 @@ from swh.scheduler.utils import create_task_dict from swh.indexer.indexer import OriginIndexer +from swh.model.hashutil import hash_to_hex + class OriginHeadIndexer(OriginIndexer): """Origin-level indexer. @@ -69,7 +71,7 @@ 'oneshot', origin_head={ str(result['origin_id']): - result['revision_id'].decode() + hash_to_hex(result['revision_id']) for result in results}, policy_update='update-dups', ) @@ -80,7 +82,7 @@ task = create_task_dict( revision_metadata_task, 'oneshot', - ids=[res['revision_id'].decode() for res in results], + ids=[hash_to_hex(res['revision_id']) for res in results], policy_update='update-dups', next_step=sub_task, ) diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py --- a/swh/indexer/storage/db.py +++ b/swh/indexer/storage/db.py @@ -364,7 +364,7 @@ regconfig=regconfig, tsquery_template=tsquery_template) cur.execute(query, tsquery_args + [limit]) - yield from cur + yield from cursor_to_bytes(cur) indexer_configuration_cols = ['id', 'tool_name', 'tool_version', 'tool_configuration'] diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -14,6 +14,8 @@ from swh.indexer.tests.test_utils import MockObjStorage, MockStorage from swh.indexer.tests.test_utils import MockIndexerStorage +from swh.model.hashutil import hash_to_bytes + class ContentMetadataTestIndexer(ContentMetadataIndexer): """Specific Metadata whose configuration is enough to satisfy the @@ -448,14 +450,14 @@ metadata_indexer = RevisionMetadataTestIndexer() sha1_gits = [ - b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f', + hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), ] metadata_indexer.run(sha1_gits, 'update-dups') results = metadata_indexer.idx_storage.added_data expected_results = [('revision_metadata', True, [{ - 'id': '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f', + 'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), 'translated_metadata': { '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', 'url': diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py --- a/swh/indexer/tests/test_origin_metadata.py +++ b/swh/indexer/tests/test_origin_metadata.py @@ -16,6 +16,8 @@ from swh.scheduler.tests.scheduler_testing import SchedulerTestFixture +from swh.model.hashutil import hash_to_bytes + class OriginMetadataTestIndexer(OriginMetadataIndexer): def prepare(self): @@ -104,13 +106,14 @@ 'keywords': ['yarn', 'parse', 'lock', 'dependencies'], } rev_metadata = { - 'id': '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f', + 'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), 'translated_metadata': metadata, 'indexer_configuration_id': 7, } origin_metadata = { 'origin_id': 54974445, - 'from_revision': '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f', + 'from_revision': hash_to_bytes( + '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), 'metadata': metadata, 'indexer_configuration_id': 7, } diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py --- a/swh/indexer/tests/test_utils.py +++ b/swh/indexer/tests/test_utils.py @@ -5,6 +5,7 @@ from swh.objstorage.exc import ObjNotFoundError from swh.model import hashutil +from swh.model.hashutil import hash_to_bytes ORIGINS = [ { @@ -119,7 +120,8 @@ 54974445: { 'branches': { b'HEAD': { - 'target': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f', + 'target': hash_to_bytes( + '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), 'target_type': 'revision'}}} } @@ -320,10 +322,12 @@ self.added_data.append( ('revision_metadata', conflict_update, metadata)) for item in metadata: + assert isinstance(item['id'], bytes) self.revision_metadata.setdefault(item['id'], []).append(item) def revision_metadata_get(self, ids): for id_ in ids: + assert isinstance(id_, bytes) yield from self.revision_metadata.get(id_) def origin_intrinsic_metadata_add(self, metadata, conflict_update=None): @@ -387,7 +391,7 @@ def revision_get(self, revisions): return [{ - 'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f', + 'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), 'committer': { 'id': 26, 'name': b'Andrew Nesbitt',