diff --git a/swh/storage/cassandra/common.py b/swh/storage/cassandra/common.py --- a/swh/storage/cassandra/common.py +++ b/swh/storage/cassandra/common.py @@ -13,4 +13,4 @@ def hash_url(url: str) -> bytes: - return hashlib.sha1(url.encode("ascii")).digest() + return hashlib.sha1(url.encode("utf8")).digest() diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -456,6 +456,7 @@ Origin(url="https://gitlab.com/user1/repo1"), Origin(url="https://gitlab.com/user2/repo1"), Origin(url="https://forge.softwareheritage.org/source/repo1"), + Origin(url="https://example.рф/🏛️.txt"), ) origin, origin2 = origins[:2] diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py --- a/swh/storage/tests/storage_tests.py +++ b/swh/storage/tests/storage_tests.py @@ -1412,23 +1412,23 @@ } def test_origin_add(self, swh_storage, sample_data): - origins = list(sample_data.origins[:2]) + origins = list(sample_data.origins) origin_urls = [o.url for o in origins] - assert swh_storage.origin_get(origin_urls) == [None, None] + assert swh_storage.origin_get(origin_urls) == [None] * len(origins) stats = swh_storage.origin_add(origins) - assert stats == {"origin:add": 2} + assert stats == {"origin:add": len(origin_urls)} actual_origins = swh_storage.origin_get(origin_urls) assert actual_origins == origins assert set(swh_storage.journal_writer.journal.objects) == set( - [("origin", origins[0]), ("origin", origins[1]),] + [("origin", origin) for origin in origins] ) swh_storage.refresh_stat_counters() - assert swh_storage.stat_counters()["origin"] == 2 + assert swh_storage.stat_counters()["origin"] == len(origins) def test_origin_add_twice(self, swh_storage, sample_data): origin, origin2 = sample_data.origins[:2] @@ -4431,6 +4431,10 @@ swh_storage.origin_add([Origin(url=obj.origin)]) visit = OriginVisit(origin=obj.origin, date=obj.date, type=obj.type,) swh_storage.origin_visit_add([visit]) + elif obj.object_type == "raw_extrinsic_metadata": + swh_storage.metadata_authority_add([obj.authority]) + swh_storage.metadata_fetcher_add([obj.fetcher]) + swh_storage.raw_extrinsic_metadata_add([obj]) else: method = getattr(swh_storage, obj_type + "_add") try: