diff --git a/swh/clearlydefined/mapping_utils.py b/swh/clearlydefined/mapping_utils.py --- a/swh/clearlydefined/mapping_utils.py +++ b/swh/clearlydefined/mapping_utils.py @@ -17,8 +17,8 @@ ToolNotFound, ToolNotSupported, ) -from swh.model.hashutil import hash_to_bytes, hash_to_hex -from swh.model.identifiers import ExtendedSWHID +from swh.model.hashutil import hash_to_bytes +from swh.model.identifiers import ExtendedSWHID, ExtendedObjectType from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, @@ -65,7 +65,7 @@ def map_row_data_with_metadata( - swh_id: str, + target: ExtendedSWHID, origin: Optional[Origin], metadata: Dict, date: datetime, @@ -77,7 +77,7 @@ swh storage """ return RawExtrinsicMetadata( - target=ExtendedSWHID.from_string(swh_id), + target=target, discovery_date=date, authority=AUTHORITY, fetcher=FETCHER, @@ -87,7 +87,7 @@ ) -def map_sha1_with_swhid(storage, sha1: str) -> Optional[str]: +def map_sha1_with_swhid(storage, sha1: str) -> Optional[ExtendedSWHID]: """ Take sha1 and storage as input and give the corresponding swhID for that sha1 @@ -97,9 +97,9 @@ content = storage.content_get([hash_to_bytes(sha1)])[0] if not content: return None - sha1_git = hash_to_hex(content.sha1_git) - swh_id = "swh:1:cnt:{sha1_git}".format(sha1_git=sha1_git) - return swh_id + return ExtendedSWHID( + object_type=ExtendedObjectType.CONTENT, object_id=content.sha1_git + ) def sha1_git_in_revisions(storage, sha1_git: str) -> bool: @@ -132,11 +132,11 @@ """ if sha1: assert isinstance(sha1, str) - swh_id = map_sha1_with_swhid(storage=storage, sha1=sha1) - if swh_id: + swhid = map_sha1_with_swhid(storage=storage, sha1=sha1) + if swhid: data.append( map_row_data_with_metadata( - swh_id=swh_id, + target=swhid, origin=None, metadata=file, date=date, @@ -259,14 +259,16 @@ return MappingStatus.IGNORE, [] if not sha1_git_in_revisions(sha1_git=sha1_git, storage=storage): return MappingStatus.UNMAPPED, [] - swh_id = "swh:1:rev:{sha1_git}".format(sha1_git=sha1_git) + swhid = ExtendedSWHID( + object_type=ExtendedObjectType.REVISION, object_id=hash_to_bytes(sha1_git) + ) else: return MappingStatus.IGNORE, [] return MappingStatus.MAPPED, [ map_row_data_with_metadata( - swh_id=swh_id, + target=swhid, origin=origin, metadata=metadata, date=date, diff --git a/swh/clearlydefined/tests/test_mapping_utils.py b/swh/clearlydefined/tests/test_mapping_utils.py --- a/swh/clearlydefined/tests/test_mapping_utils.py +++ b/swh/clearlydefined/tests/test_mapping_utils.py @@ -156,8 +156,8 @@ def test_mapping_sha1_with_swhID(swh_storage): add_content_data(swh_storage) sha1 = "34973274ccef6ab4dfaaf86599792fa9c3fe4689" - assert "swh:1:cnt:d81cc0710eb6cf9efd5b920a8453e1e07157b6cd" == map_sha1_with_swhid( - sha1=sha1, storage=swh_storage + assert "swh:1:cnt:d81cc0710eb6cf9efd5b920a8453e1e07157b6cd" == str( + map_sha1_with_swhid(sha1=sha1, storage=swh_storage) )