Changeset View
Changeset View
Standalone View
Standalone View
swh/clearlydefined/tests/test_mapping_utils.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from swh.clearlydefined.mapping_utils import map_sha1_with_swhid | from datetime import datetime, timezone | ||||
from swh.clearlydefined.mapping_utils import map_row | import gzip | ||||
from swh.clearlydefined.mapping_utils import map_definition | import json | ||||
import os | |||||
import attr | |||||
import pytest | |||||
from swh.clearlydefined.error import ( | from swh.clearlydefined.error import ( | ||||
InvalidComponents, | InvalidComponents, | ||||
WrongMetadata, | |||||
NoJsonExtension, | NoJsonExtension, | ||||
RevisionNotFound, | RevisionNotFound, | ||||
ToolNotSupported, | |||||
ToolNotFound, | ToolNotFound, | ||||
ToolNotSupported, | |||||
WrongMetadata, | |||||
) | |||||
from swh.clearlydefined.mapping_utils import ( | |||||
authority, | |||||
fetcher, | |||||
map_definition, | |||||
map_row, | |||||
map_sha1_with_swhid, | |||||
) | ) | ||||
from swh.model import from_disk | from swh.model import from_disk | ||||
from swh.model.hashutil import hash_to_bytes | |||||
from swh.model.identifiers import parse_swhid | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
MetadataTargetType, | |||||
Origin, | |||||
Content, | Content, | ||||
Revision, | |||||
Person, | |||||
TimestampWithTimezone, | |||||
Timestamp, | |||||
RevisionType, | |||||
Directory, | Directory, | ||||
DirectoryEntry, | DirectoryEntry, | ||||
MetadataTargetType, | |||||
Person, | |||||
RawExtrinsicMetadata, | |||||
Revision, | |||||
RevisionType, | |||||
Timestamp, | |||||
TimestampWithTimezone, | |||||
) | ) | ||||
from swh.model.hashutil import hash_to_bytes | |||||
import gzip | |||||
import os | |||||
from typing import Tuple | |||||
import pytest | |||||
content_data = [ | content_data = [ | ||||
Content.from_data(b"42\n"), | Content.from_data(b"42\n"), | ||||
Content.from_data(b"4242\n"), | Content.from_data(b"4242\n"), | ||||
] | ] | ||||
directory = Directory( | directory = Directory( | ||||
id=hash_to_bytes("5256e856a0a0898966d6ba14feb4388b8b82d302"), | id=hash_to_bytes("5256e856a0a0898966d6ba14feb4388b8b82d302"), | ||||
▲ Show 20 Lines • Show All 84 Lines • ▼ Show 20 Lines | Revision( | ||||
directory=directory.id, | directory=directory.id, | ||||
metadata=None, | metadata=None, | ||||
extra_headers=(), | extra_headers=(), | ||||
synthetic=False, | synthetic=False, | ||||
), | ), | ||||
] | ] | ||||
def make_row(id: str, text: str) -> Tuple[str, bytes]: | |||||
""" | |||||
Take id and text as input and return a | |||||
row like a row present in | |||||
clearcode toolkit database | |||||
""" | |||||
row = (id, gzip.compress(text.encode())) | |||||
return row | |||||
def file_data(file_name): | def file_data(file_name): | ||||
with open(file_name) as file: | with open(file_name) as file: | ||||
data = file.read() | data = file.read() | ||||
return data | return data | ||||
def add_content_data(swh_storage): | def add_content_data(swh_storage): | ||||
swh_storage.content_add(content_data) | swh_storage.content_add(content_data) | ||||
Show All 20 Lines | |||||
def test_mapping_with_wrong_sha1(swh_storage): | def test_mapping_with_wrong_sha1(swh_storage): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
sha1 = "6ac599151a7aaa8ca5d38dc5bb61b49193a3cadc1ed33de5a57e4d1ecc53c846" | sha1 = "6ac599151a7aaa8ca5d38dc5bb61b49193a3cadc1ed33de5a57e4d1ecc53c846" | ||||
assert map_sha1_with_swhid(sha1=sha1, storage=swh_storage) is None | assert map_sha1_with_swhid(sha1=sha1, storage=swh_storage) is None | ||||
def test_map_row_for_definitions_with_sha1(swh_storage, datadir): | def test_map_row_for_definitions_with_sha1(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
row = make_row( | |||||
id="maven/mavencentral/za.co.absa.cobrix/cobol-parser/revision/0.4.0.json", | |||||
text=file_data(os.path.join(datadir, "definitions.json")), | |||||
) | |||||
expected = ( | expected = ( | ||||
True, | True, | ||||
[ | [ | ||||
( | RawExtrinsicMetadata( | ||||
"swh:1:cnt:d81cc0710eb6cf9efd5b920a8453e1e07157b6cd", | type=MetadataTargetType.CONTENT, | ||||
MetadataTargetType.CONTENT, | target=parse_swhid( | ||||
Origin( | "swh:1:cnt:d81cc0710eb6cf9efd5b920a8453e1e07157b6cd" | ||||
url="http://central.maven.org/maven2/za/co/absa/cobrix/" | ), | ||||
"cobol-parser/0.4.0/cobol-parser-0.4.0-sources.jar" | discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
authority=attr.evolve(authority, metadata=None), | |||||
fetcher=attr.evolve(fetcher, metadata=None), | |||||
format="clearlydefined-definition-json", | |||||
origin="http://central.maven.org/maven2/za/co/absa/cobrix/cobol-parser/" | |||||
"0.4.0/cobol-parser-0.4.0-sources.jar", | |||||
metadata=json.dumps( | |||||
json.loads(file_data(os.path.join(datadir, "definitions.json"))) | |||||
).encode("utf-8"), | |||||
), | ), | ||||
) | |||||
], | ], | ||||
) | ) | ||||
assert map_row(storage=swh_storage, row=row) == expected | assert ( | ||||
map_row( | |||||
storage=swh_storage, | |||||
id="maven/mavencentral/za.co.absa.cobrix/cobol-parser/revision/0.4.0.json", | |||||
metadata=gzip.compress( | |||||
file_data(os.path.join(datadir, "definitions.json")).encode() | |||||
), | |||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | |||||
== expected | |||||
) | |||||
def test_map_row_for_definitions_with_gitsha1(swh_storage, datadir): | def test_map_row_for_definitions_with_gitsha1(swh_storage, datadir): | ||||
add_revision_data(swh_storage) | add_revision_data(swh_storage) | ||||
row = make_row( | |||||
id="maven/mavencentral/za.co.absa.cobrix/cobol-parser/revision/0.4.0.json", | |||||
text=file_data(os.path.join(datadir, "definitions_sha1git.json")), | |||||
) | |||||
expected = ( | expected = ( | ||||
True, | True, | ||||
[ | [ | ||||
( | RawExtrinsicMetadata( | ||||
"swh:1:rev:4c66129b968ab8122964823d1d77677f50884cf6", | type=MetadataTargetType.REVISION, | ||||
MetadataTargetType.REVISION, | target=parse_swhid( | ||||
Origin( | "swh:1:rev:4c66129b968ab8122964823d1d77677f50884cf6" | ||||
url="http://central.maven.org/maven2/za/co/absa/cobrix/" | ), | ||||
"cobol-parser/0.4.0/cobol-parser-0.4.0-sources.jar" | discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
), | authority=attr.evolve(authority, metadata=None), | ||||
fetcher=attr.evolve(fetcher, metadata=None), | |||||
format="clearlydefined-definition-json", | |||||
origin="http://central.maven.org/maven2/za/co/absa/cobrix/cobol-parser/" | |||||
"0.4.0/cobol-parser-0.4.0-sources.jar", | |||||
metadata=json.dumps( | |||||
json.loads( | |||||
file_data(os.path.join(datadir, "definitions_sha1git.json")) | |||||
) | ) | ||||
).encode("utf-8"), | |||||
), | |||||
], | ], | ||||
) | ) | ||||
assert map_row(storage=swh_storage, row=row) == expected | assert ( | ||||
map_row( | |||||
storage=swh_storage, | |||||
id="maven/mavencentral/za.co.absa.cobrix/cobol-parser/revision/0.4.0.json", | |||||
metadata=gzip.compress( | |||||
file_data(os.path.join(datadir, "definitions_sha1git.json")).encode() | |||||
), | |||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | |||||
== expected | |||||
) | |||||
def test_map_row_for_scancode(swh_storage, datadir): | def test_map_row_for_scancode(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
row = make_row( | |||||
id="npm/npmjs/@ngtools/webpack/revision/10.2.1/tool/scancode/3.2.2.json", | |||||
text=file_data(os.path.join(datadir, "scancode.json")), | |||||
) | |||||
expected = ( | expected = ( | ||||
False, | False, | ||||
[ | [ | ||||
( | RawExtrinsicMetadata( | ||||
"swh:1:cnt:d81cc0710eb6cf9efd5b920a8453e1e07157b6cd", | type=MetadataTargetType.CONTENT, | ||||
MetadataTargetType.CONTENT, | target=parse_swhid( | ||||
None, | "swh:1:cnt:d81cc0710eb6cf9efd5b920a8453e1e07157b6cd" | ||||
), | |||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
authority=attr.evolve(authority, metadata=None), | |||||
fetcher=attr.evolve(fetcher, metadata=None), | |||||
format="clearlydefined-harvest-scancode-json", | |||||
origin=None, | |||||
metadata=json.dumps( | |||||
json.loads( | |||||
file_data(os.path.join(datadir, "scancode_metadata.json")) | |||||
) | ) | ||||
).encode("utf-8"), | |||||
), | |||||
], | ], | ||||
) | ) | ||||
assert map_row(storage=swh_storage, row=row) == expected | assert ( | ||||
map_row( | |||||
storage=swh_storage, | |||||
id="npm/npmjs/@ngtools/webpack/revision/10.2.1/tool/scancode/3.2.2.json", | |||||
metadata=gzip.compress( | |||||
file_data(os.path.join(datadir, "scancode.json")).encode() | |||||
), | |||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | |||||
== expected | |||||
) | |||||
def test_map_row_for_scancode_true_mapping_status(swh_storage, datadir): | def test_map_row_for_scancode_true_mapping_status(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
row = make_row( | |||||
id="npm/npmjs/@ngtools/webpack/revision/10.2.1/tool/scancode/3.2.2.json", | |||||
text=file_data(os.path.join(datadir, "scancode_true.json")), | |||||
) | |||||
expected = ( | expected = ( | ||||
True, | True, | ||||
[ | [ | ||||
( | RawExtrinsicMetadata( | ||||
"swh:1:cnt:d81cc0710eb6cf9efd5b920a8453e1e07157b6cd", | type=MetadataTargetType.CONTENT, | ||||
MetadataTargetType.CONTENT, | target=parse_swhid( | ||||
None, | "swh:1:cnt:d81cc0710eb6cf9efd5b920a8453e1e07157b6cd" | ||||
), | |||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
authority=attr.evolve(authority, metadata=None), | |||||
fetcher=attr.evolve(fetcher, metadata=None), | |||||
format="clearlydefined-harvest-scancode-json", | |||||
origin=None, | |||||
metadata=json.dumps( | |||||
json.loads( | |||||
file_data(os.path.join(datadir, "scancode_metadata.json")) | |||||
) | ) | ||||
).encode("utf-8"), | |||||
), | |||||
], | ], | ||||
) | ) | ||||
assert map_row(storage=swh_storage, row=row) == expected | assert ( | ||||
map_row( | |||||
storage=swh_storage, | |||||
id="npm/npmjs/@ngtools/webpack/revision/10.2.1/tool/scancode/3.2.2.json", | |||||
metadata=gzip.compress( | |||||
file_data(os.path.join(datadir, "scancode_true.json")).encode() | |||||
), | |||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | |||||
== expected | |||||
) | |||||
def test_map_row_for_licensee(swh_storage, datadir): | def test_map_row_for_licensee(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
row = make_row( | |||||
id="npm/npmjs/@fluidframework/replay-driver/revision/0.31.0/tool/licensee/\ | |||||
9.13.0.json", | |||||
text=file_data(os.path.join(datadir, "licensee.json")), | |||||
) | |||||
expected = ( | expected = ( | ||||
False, | False, | ||||
[ | [ | ||||
( | RawExtrinsicMetadata( | ||||
"swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa", | type=MetadataTargetType.CONTENT, | ||||
MetadataTargetType.CONTENT, | target=parse_swhid( | ||||
None, | "swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa" | ||||
), | |||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
authority=attr.evolve(authority, metadata=None), | |||||
fetcher=attr.evolve(fetcher, metadata=None), | |||||
format="clearlydefined-harvest-licensee-json", | |||||
origin=None, | |||||
metadata=json.dumps( | |||||
json.loads( | |||||
file_data(os.path.join(datadir, "licensee_metadata.json")) | |||||
) | ) | ||||
).encode("utf-8"), | |||||
), | |||||
], | ], | ||||
) | ) | ||||
assert map_row(storage=swh_storage, row=row) == expected | assert ( | ||||
map_row( | |||||
storage=swh_storage, | |||||
id="npm/npmjs/@fluidframework/replay-driver/revision/0.31.0/tool/licensee/" | |||||
"9.13.0.json", | |||||
metadata=gzip.compress( | |||||
file_data(os.path.join(datadir, "licensee.json")).encode() | |||||
), | |||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | |||||
== expected | |||||
) | |||||
def test_map_row_for_licensee_true_mapping_status(swh_storage, datadir): | def test_map_row_for_licensee_true_mapping_status(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
row = make_row( | |||||
id="npm/npmjs/@fluidframework/replay-driver/revision/0.31.0/tool/licensee/\ | |||||
9.13.0.json", | |||||
text=file_data(os.path.join(datadir, "licensee_true.json")), | |||||
) | |||||
expected = ( | expected = ( | ||||
True, | True, | ||||
[ | [ | ||||
( | RawExtrinsicMetadata( | ||||
"swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa", | type=MetadataTargetType.CONTENT, | ||||
MetadataTargetType.CONTENT, | target=parse_swhid( | ||||
None, | "swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa" | ||||
), | |||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
authority=attr.evolve(authority, metadata=None), | |||||
fetcher=attr.evolve(fetcher, metadata=None), | |||||
format="clearlydefined-harvest-licensee-json", | |||||
origin=None, | |||||
metadata=json.dumps( | |||||
json.loads( | |||||
file_data(os.path.join(datadir, "licensee_metadata.json")) | |||||
) | ) | ||||
).encode("utf-8"), | |||||
), | |||||
], | ], | ||||
) | ) | ||||
assert map_row(storage=swh_storage, row=row) == expected | assert ( | ||||
map_row( | |||||
storage=swh_storage, | |||||
id="npm/npmjs/@fluidframework/replay-driver/revision/0.31.0/tool/licensee/" | |||||
"9.13.0.json", | |||||
metadata=gzip.compress( | |||||
file_data(os.path.join(datadir, "licensee_true.json")).encode() | |||||
), | |||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | |||||
== expected | |||||
) | |||||
def test_map_row_for_clearlydefined(swh_storage, datadir): | def test_map_row_for_clearlydefined(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
row = make_row( | |||||
id="npm/npmjs/@pixi/mesh-extras/revision/5.3.5/tool/clearlydefined/1.3.4.json", | |||||
text=file_data(os.path.join(datadir, "clearlydefined.json")), | |||||
) | |||||
expected = ( | expected = ( | ||||
False, | False, | ||||
[ | [ | ||||
( | RawExtrinsicMetadata( | ||||
"swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa", | type=MetadataTargetType.CONTENT, | ||||
MetadataTargetType.CONTENT, | target=parse_swhid( | ||||
None, | "swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa" | ||||
), | |||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
authority=attr.evolve(authority, metadata=None), | |||||
fetcher=attr.evolve(fetcher, metadata=None), | |||||
format="clearlydefined-harvest-clearlydefined-json", | |||||
origin=None, | |||||
metadata=json.dumps( | |||||
json.loads( | |||||
file_data(os.path.join(datadir, "clearlydefined_metadata.json")) | |||||
) | |||||
).encode("utf-8"), | |||||
), | |||||
RawExtrinsicMetadata( | |||||
type=MetadataTargetType.CONTENT, | |||||
target=parse_swhid( | |||||
"swh:1:cnt:d81cc0710eb6cf9efd5b920a8453e1e07157b6cd" | |||||
), | |||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
authority=attr.evolve(authority, metadata=None), | |||||
fetcher=attr.evolve(fetcher, metadata=None), | |||||
format="clearlydefined-harvest-clearlydefined-json", | |||||
origin=None, | |||||
metadata=json.dumps( | |||||
json.loads( | |||||
file_data( | |||||
os.path.join(datadir, "clearlydefined_metadata_2.json") | |||||
) | |||||
) | ) | ||||
).encode("utf-8"), | |||||
), | |||||
], | ], | ||||
) | ) | ||||
map_row(storage=swh_storage, row=row) == expected | assert ( | ||||
map_row( | |||||
storage=swh_storage, | |||||
id="npm/npmjs/@pixi/mesh-extras/revision/5.3.5/tool/clearlydefined/" | |||||
"1.3.4.json", | |||||
metadata=gzip.compress( | |||||
file_data(os.path.join(datadir, "clearlydefined.json")).encode() | |||||
), | |||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | |||||
== expected | |||||
) | |||||
def test_map_row_for_clearlydefined_true_mapping_status(swh_storage, datadir): | def test_map_row_for_clearlydefined_true_mapping_status(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
row = make_row( | |||||
id="npm/npmjs/@pixi/mesh-extras/revision/5.3.5/tool/clearlydefined/1.3.4.json", | |||||
text=file_data(os.path.join(datadir, "clearlydefined_true.json")), | |||||
) | |||||
expected = ( | expected = ( | ||||
True, | True, | ||||
[ | [ | ||||
( | RawExtrinsicMetadata( | ||||
"swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa", | type=MetadataTargetType.CONTENT, | ||||
MetadataTargetType.CONTENT, | target=parse_swhid( | ||||
None, | "swh:1:cnt:36fade77193cb6d2bd826161a0979d64c28ab4fa" | ||||
), | |||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
authority=attr.evolve(authority, metadata=None), | |||||
fetcher=attr.evolve(fetcher, metadata=None), | |||||
format="clearlydefined-harvest-clearlydefined-json", | |||||
origin=None, | |||||
metadata=json.dumps( | |||||
json.loads( | |||||
file_data(os.path.join(datadir, "clearlydefined_metadata.json")) | |||||
) | |||||
).encode("utf-8"), | |||||
), | |||||
RawExtrinsicMetadata( | |||||
type=MetadataTargetType.CONTENT, | |||||
target=parse_swhid( | |||||
"swh:1:cnt:d81cc0710eb6cf9efd5b920a8453e1e07157b6cd" | |||||
), | |||||
discovery_date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
authority=attr.evolve(authority, metadata=None), | |||||
fetcher=attr.evolve(fetcher, metadata=None), | |||||
format="clearlydefined-harvest-clearlydefined-json", | |||||
origin=None, | |||||
metadata=json.dumps( | |||||
json.loads( | |||||
file_data( | |||||
os.path.join(datadir, "clearlydefined_metadata_2.json") | |||||
) | ) | ||||
) | |||||
).encode("utf-8"), | |||||
), | |||||
], | ], | ||||
) | ) | ||||
map_row(storage=swh_storage, row=row) == expected | assert ( | ||||
map_row( | |||||
storage=swh_storage, | |||||
id="npm/npmjs/@pixi/mesh-extras/revision/5.3.5/tool/clearlydefined/" | |||||
"1.3.4.json", | |||||
metadata=gzip.compress( | |||||
file_data(os.path.join(datadir, "clearlydefined_true.json")).encode() | |||||
), | |||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | |||||
== expected | |||||
) | |||||
def test_sha1git_not_in_revision(swh_storage, datadir): | def test_sha1git_not_in_revision(swh_storage, datadir): | ||||
add_revision_data(swh_storage) | add_revision_data(swh_storage) | ||||
assert ( | assert ( | ||||
map_definition( | map_definition( | ||||
metadata_string=file_data( | metadata_string=file_data( | ||||
os.path.join(datadir, "definitions_not_mapped_sha1_git.json") | os.path.join(datadir, "definitions_not_mapped_sha1_git.json") | ||||
), | ), | ||||
storage=swh_storage, | storage=swh_storage, | ||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | ) | ||||
is None | is None | ||||
) | ) | ||||
def test_sha1_not_in_content(swh_storage, datadir): | def test_sha1_not_in_content(swh_storage, datadir): | ||||
add_content_data(swh_storage) | add_content_data(swh_storage) | ||||
assert ( | assert ( | ||||
map_definition( | map_definition( | ||||
metadata_string=file_data( | metadata_string=file_data( | ||||
os.path.join(datadir, "definitions_not_mapped.json") | os.path.join(datadir, "definitions_not_mapped.json") | ||||
), | ), | ||||
storage=swh_storage, | storage=swh_storage, | ||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | ) | ||||
is None | is None | ||||
) | ) | ||||
def test_map_definition_with_wrong_metadata(swh_storage, datadir): | def test_map_definition_with_wrong_metadata(swh_storage, datadir): | ||||
with pytest.raises(WrongMetadata): | with pytest.raises(WrongMetadata): | ||||
map_definition( | map_definition( | ||||
metadata_string=file_data(os.path.join(datadir, "licensee.json")), | metadata_string=file_data(os.path.join(datadir, "licensee.json")), | ||||
storage=swh_storage, | storage=swh_storage, | ||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | ) | ||||
def test_map_row_with_invalid_ID(swh_storage): | def test_map_row_with_invalid_ID(swh_storage): | ||||
row = make_row( | |||||
id="maven/mavencentral/cobol-parser/abc/revision/def/0.4.0.json", text="abc" | |||||
) | |||||
with pytest.raises(InvalidComponents): | with pytest.raises(InvalidComponents): | ||||
map_row(storage=swh_storage, row=row) | map_row( | ||||
storage=swh_storage, | |||||
id="maven/mavencentral/cobol-parser/abc/revision/def/0.4.0.json", | |||||
metadata=gzip.compress(" ".encode()), | |||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | |||||
def test_map_row_with_empty_metadata_string(swh_storage): | def test_map_row_with_empty_metadata_string(swh_storage): | ||||
row = make_row( | map_row( | ||||
storage=swh_storage, | |||||
id="maven/mavencentral/za.co.absa.cobrix/cobol-parser/revision/0.4.0.json", | id="maven/mavencentral/za.co.absa.cobrix/cobol-parser/revision/0.4.0.json", | ||||
text="", | metadata=gzip.compress("".encode()), | ||||
) | date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | ||||
assert map_row(storage=swh_storage, row=row) is None | ) is None | ||||
def test_map_row_with_invalid_ID_without_revision(swh_storage): | def test_map_row_with_invalid_ID_without_revision(swh_storage): | ||||
row = make_row( | with pytest.raises(RevisionNotFound): | ||||
map_row( | |||||
storage=swh_storage, | |||||
id="maven/mavencentral/za.co.absa.cobrix/cobol-parser/abc/0.4.0.json", | id="maven/mavencentral/za.co.absa.cobrix/cobol-parser/abc/0.4.0.json", | ||||
text="abc", | metadata=gzip.compress("".encode()), | ||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | ) | ||||
with pytest.raises(RevisionNotFound): | |||||
map_row(storage=swh_storage, row=row) | |||||
def test_map_row_with_invalid_ID_without_json_extension(swh_storage): | def test_map_row_with_invalid_ID_without_json_extension(swh_storage): | ||||
row = make_row( | with pytest.raises(NoJsonExtension): | ||||
map_row( | |||||
storage=swh_storage, | |||||
id="maven/mavencentral/za.co.absa.cobrix/cobol-parser/revision/0.4.0.txt", | id="maven/mavencentral/za.co.absa.cobrix/cobol-parser/revision/0.4.0.txt", | ||||
text="abc", | metadata=gzip.compress("".encode()), | ||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | ) | ||||
with pytest.raises(NoJsonExtension): | |||||
map_row(storage=swh_storage, row=row) | |||||
def test_map_row_with_invalid_ID_without_6_or_9_length(swh_storage): | def test_map_row_with_invalid_ID_without_6_or_9_length(swh_storage): | ||||
row = make_row( | |||||
id="npm/npmjs/@ngtools/webpack/revision/10.2.1/tool/3.2.2.json", text="abc" | |||||
) | |||||
with pytest.raises(InvalidComponents): | with pytest.raises(InvalidComponents): | ||||
map_row(storage=swh_storage, row=row) | map_row( | ||||
storage=swh_storage, | |||||
id="npm/npmjs/@ngtools/webpack/revision/10.2.1/tool/3.2.2.json", | |||||
metadata=gzip.compress("".encode()), | |||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | |||||
def test_map_row_with_invalid_tool(swh_storage): | def test_map_row_with_invalid_tool(swh_storage): | ||||
row = make_row( | with pytest.raises(ToolNotSupported): | ||||
map_row( | |||||
storage=swh_storage, | |||||
id="npm/npmjs/@ngtools/webpack/revision/10.2.1/tool/abc/3.2.2.json", | id="npm/npmjs/@ngtools/webpack/revision/10.2.1/tool/abc/3.2.2.json", | ||||
text="abc", | metadata=gzip.compress("".encode()), | ||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | ) | ||||
with pytest.raises(ToolNotSupported): | |||||
map_row(storage=swh_storage, row=row) | |||||
def test_map_row_with_invalid_harvest_ID(swh_storage): | def test_map_row_with_invalid_harvest_ID(swh_storage): | ||||
row = make_row( | with pytest.raises(ToolNotFound): | ||||
map_row( | |||||
storage=swh_storage, | |||||
id="npm/npmjs/@ngtools/webpack/revision/10.2.1/abc/scancode/3.2.2.json", | id="npm/npmjs/@ngtools/webpack/revision/10.2.1/abc/scancode/3.2.2.json", | ||||
text="abc", | metadata=gzip.compress("".encode()), | ||||
date=datetime(year=2021, month=2, day=6, tzinfo=timezone.utc), | |||||
) | ) | ||||
with pytest.raises(ToolNotFound): | |||||
map_row(storage=swh_storage, row=row) |