Changeset View
Changeset View
Standalone View
Standalone View
swh/clearlydefined/tests/test_mapping_utils.py
Show All 10 Lines | |||||
import pytest | import pytest | ||||
from swh.clearlydefined.error import ( | from swh.clearlydefined.error import ( | ||||
InvalidComponents, | InvalidComponents, | ||||
NoJsonExtension, | NoJsonExtension, | ||||
RevisionNotFound, | RevisionNotFound, | ||||
ToolNotFound, | ToolNotFound, | ||||
ToolNotSupported, | ToolNotSupported, | ||||
WrongMetadata, | |||||
) | ) | ||||
from swh.clearlydefined.mapping_utils import ( | from swh.clearlydefined.mapping_utils import ( | ||||
AUTHORITY, | AUTHORITY, | ||||
FETCHER, | FETCHER, | ||||
MappingStatus, | |||||
map_definition, | map_definition, | ||||
map_row, | map_row, | ||||
map_sha1_with_swhid, | map_sha1_with_swhid, | ||||
) | ) | ||||
from swh.model import from_disk | from swh.model import from_disk | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.identifiers import parse_swhid | from swh.model.identifiers import parse_swhid | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
▲ Show 20 Lines • Show All 137 Lines • ▼ Show 20 Lines | |||||
def test_mapping_with_wrong_sha1(swh_storage): | def test_mapping_with_wrong_sha1(swh_storage): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
sha1 = "6ac599151a7aaa8ca5d38dc5bb61b49193a3cadc1ed33de5a57e4d1ecc53c846" | sha1 = "6ac599151a7aaa8ca5d38dc5bb61b49193a3cadc1ed33de5a57e4d1ecc53c846" | ||||
assert map_sha1_with_swhid(sha1=sha1, storage=swh_storage) is None | assert map_sha1_with_swhid(sha1=sha1, storage=swh_storage) is None | ||||
def test_map_row_for_definitions_with_sha1(swh_storage, datadir): | def test_map_row_for_definitions_with_no_sha1_sha1git(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
expected = ( | expected = MappingStatus.UNMAPPED, [] | ||||
True, | |||||
[ | |||||
RawExtrinsicMetadata( | |||||
type=MetadataTargetType.CONTENT, | |||||
target=parse_swhid( | |||||
"swh:1:cnt:d81cc0710eb6cf9efd5b920a8453e1e07157b6cd" | |||||
), | |||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
authority=AUTHORITY, | |||||
fetcher=FETCHER, | |||||
format="clearlydefined-definition-json", | |||||
origin="http://central.maven.org/maven2/za/co/absa/cobrix/cobol-parser/" | |||||
"0.4.0/cobol-parser-0.4.0-sources.jar", | |||||
metadata=json.dumps( | |||||
json.loads(file_data(os.path.join(datadir, "definitions.json"))) | |||||
).encode("utf-8"), | |||||
), | |||||
], | |||||
) | |||||
assert ( | assert ( | ||||
map_row( | map_row( | ||||
storage=swh_storage, | storage=swh_storage, | ||||
id="maven/mavencentral/za.co.absa.cobrix/cobol-parser/revision/0.4.0.json", | id="maven/mavencentral/za.co.absa.cobrix/cobol-parser/revision/0.4.0.json", | ||||
metadata=gzip.compress( | metadata=gzip.compress( | ||||
file_data(os.path.join(datadir, "definitions.json")).encode() | file_data( | ||||
os.path.join(datadir, "def_with_no_sha1_and_sha1git.json") | |||||
).encode() | |||||
), | ), | ||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
) | ) | ||||
== expected | == expected | ||||
) | ) | ||||
def test_map_row_for_definitions_with_gitsha1(swh_storage, datadir): | def test_map_row_for_definitions_with_gitsha1(swh_storage, datadir): | ||||
add_revision_data(swh_storage) | add_revision_data(swh_storage) | ||||
expected = ( | expected = ( | ||||
True, | MappingStatus.MAPPED, | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.REVISION, | type=MetadataTargetType.REVISION, | ||||
target=parse_swhid( | target=parse_swhid( | ||||
"swh:1:rev:4c66129b968ab8122964823d1d77677f50884cf6" | "swh:1:rev:4c66129b968ab8122964823d1d77677f50884cf6" | ||||
), | ), | ||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
authority=AUTHORITY, | authority=AUTHORITY, | ||||
Show All 20 Lines | assert ( | ||||
) | ) | ||||
== expected | == expected | ||||
) | ) | ||||
def test_map_row_for_scancode(swh_storage, datadir): | def test_map_row_for_scancode(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
expected = ( | expected = ( | ||||
False, | MappingStatus.UNMAPPED, | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.CONTENT, | type=MetadataTargetType.CONTENT, | ||||
target=parse_swhid( | target=parse_swhid( | ||||
"swh:1:cnt:d81cc0710eb6cf9efd5b920a8453e1e07157b6cd" | "swh:1:cnt:d81cc0710eb6cf9efd5b920a8453e1e07157b6cd" | ||||
), | ), | ||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
authority=AUTHORITY, | authority=AUTHORITY, | ||||
Show All 19 Lines | assert ( | ||||
) | ) | ||||
== expected | == expected | ||||
) | ) | ||||
def test_map_row_for_scancode_true_mapping_status(swh_storage, datadir): | def test_map_row_for_scancode_true_mapping_status(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
expected = ( | expected = ( | ||||
True, | MappingStatus.MAPPED, | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.CONTENT, | type=MetadataTargetType.CONTENT, | ||||
target=parse_swhid( | target=parse_swhid( | ||||
"swh:1:cnt:d81cc0710eb6cf9efd5b920a8453e1e07157b6cd" | "swh:1:cnt:d81cc0710eb6cf9efd5b920a8453e1e07157b6cd" | ||||
), | ), | ||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
authority=AUTHORITY, | authority=AUTHORITY, | ||||
Show All 19 Lines | assert ( | ||||
) | ) | ||||
== expected | == expected | ||||
) | ) | ||||
def test_map_row_for_licensee(swh_storage, datadir): | def test_map_row_for_licensee(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
expected = ( | expected = ( | ||||
False, | MappingStatus.UNMAPPED, | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.CONTENT, | type=MetadataTargetType.CONTENT, | ||||
target=parse_swhid( | target=parse_swhid( | ||||
"swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa" | "swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa" | ||||
), | ), | ||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
authority=AUTHORITY, | authority=AUTHORITY, | ||||
Show All 20 Lines | assert ( | ||||
) | ) | ||||
== expected | == expected | ||||
) | ) | ||||
def test_map_row_for_licensee_true_mapping_status(swh_storage, datadir): | def test_map_row_for_licensee_true_mapping_status(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
expected = ( | expected = ( | ||||
True, | MappingStatus.MAPPED, | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.CONTENT, | type=MetadataTargetType.CONTENT, | ||||
target=parse_swhid( | target=parse_swhid( | ||||
"swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa" | "swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa" | ||||
), | ), | ||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
authority=AUTHORITY, | authority=AUTHORITY, | ||||
Show All 20 Lines | assert ( | ||||
) | ) | ||||
== expected | == expected | ||||
) | ) | ||||
def test_map_row_for_clearlydefined(swh_storage, datadir): | def test_map_row_for_clearlydefined(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
expected = ( | expected = ( | ||||
False, | MappingStatus.UNMAPPED, | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.CONTENT, | type=MetadataTargetType.CONTENT, | ||||
target=parse_swhid( | target=parse_swhid( | ||||
"swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa" | "swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa" | ||||
), | ), | ||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
authority=AUTHORITY, | authority=AUTHORITY, | ||||
Show All 38 Lines | assert ( | ||||
) | ) | ||||
== expected | == expected | ||||
) | ) | ||||
def test_map_row_for_clearlydefined_true_mapping_status(swh_storage, datadir): | def test_map_row_for_clearlydefined_true_mapping_status(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
expected = ( | expected = ( | ||||
True, | MappingStatus.MAPPED, | ||||
[ | [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.CONTENT, | type=MetadataTargetType.CONTENT, | ||||
target=parse_swhid( | target=parse_swhid( | ||||
"swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa" | "swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa" | ||||
), | ), | ||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
authority=AUTHORITY, | authority=AUTHORITY, | ||||
Show All 37 Lines | assert ( | ||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
) | ) | ||||
== expected | == expected | ||||
) | ) | ||||
def test_sha1git_not_in_revision(swh_storage, datadir): | def test_sha1git_not_in_revision(swh_storage, datadir): | ||||
add_revision_data(swh_storage) | add_revision_data(swh_storage) | ||||
expected = MappingStatus.UNMAPPED, [] | |||||
assert ( | assert ( | ||||
map_definition( | map_definition( | ||||
metadata_string=file_data( | metadata_string=file_data( | ||||
os.path.join(datadir, "definitions_not_mapped_sha1_git.json") | os.path.join(datadir, "definitions_not_mapped_sha1_git.json") | ||||
), | ), | ||||
storage=swh_storage, | storage=swh_storage, | ||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
) | ) | ||||
is None | == expected | ||||
) | ) | ||||
def test_sha1_not_in_content(swh_storage, datadir): | def test_sha1_not_in_content(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
expected = MappingStatus.IGNORE, [] | |||||
assert ( | assert ( | ||||
map_definition( | map_definition( | ||||
metadata_string=file_data( | metadata_string=file_data( | ||||
os.path.join(datadir, "definitions_not_mapped.json") | os.path.join(datadir, "definitions_not_mapped.json") | ||||
), | ), | ||||
storage=swh_storage, | storage=swh_storage, | ||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
) | ) | ||||
is None | == expected | ||||
) | ) | ||||
def test_map_definition_with_wrong_metadata(swh_storage, datadir): | def test_map_definition_with_data_to_be_ignored(swh_storage, datadir): | ||||
with pytest.raises(WrongMetadata): | add_content_data(swh_storage) | ||||
expected = MappingStatus.IGNORE, [] | |||||
assert ( | |||||
map_definition( | map_definition( | ||||
metadata_string=file_data(os.path.join(datadir, "licensee.json")), | metadata_string=file_data(os.path.join(datadir, "licensee.json")), | ||||
storage=swh_storage, | storage=swh_storage, | ||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
) | ) | ||||
== expected | |||||
) | |||||
def test_map_row_with_invalid_ID(swh_storage): | def test_map_row_with_invalid_ID(swh_storage): | ||||
with pytest.raises(InvalidComponents): | with pytest.raises(InvalidComponents): | ||||
map_row( | map_row( | ||||
storage=swh_storage, | storage=swh_storage, | ||||
id="maven/mavencentral/cobol-parser/abc/revision/def/0.4.0.json", | id="maven/mavencentral/cobol-parser/abc/revision/def/0.4.0.json", | ||||
metadata=gzip.compress(" ".encode()), | metadata=gzip.compress(" ".encode()), | ||||
▲ Show 20 Lines • Show All 61 Lines • Show Last 20 Lines |