Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_metadata.py
# Copyright (C) 2017-2020 The Software Heritage developers | # Copyright (C) 2017-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import unittest | import logging | ||||
from hypothesis import HealthCheck, given, settings, strategies | from hypothesis import HealthCheck, given, settings, strategies | ||||
import pytest | |||||
from swh.indexer.codemeta import CODEMETA_TERMS | from swh.indexer.codemeta import CODEMETA_TERMS | ||||
from swh.indexer.metadata import ContentMetadataIndexer, RevisionMetadataIndexer | from swh.indexer.metadata import ContentMetadataIndexer, DirectoryMetadataIndexer | ||||
from swh.indexer.metadata_detector import detect_metadata | from swh.indexer.metadata_detector import detect_metadata | ||||
from swh.indexer.metadata_dictionary import MAPPINGS | from swh.indexer.metadata_dictionary import MAPPINGS | ||||
from swh.indexer.metadata_dictionary.maven import MavenMapping | from swh.indexer.metadata_dictionary.maven import MavenMapping | ||||
from swh.indexer.metadata_dictionary.npm import NpmMapping | from swh.indexer.metadata_dictionary.npm import NpmMapping | ||||
from swh.indexer.metadata_dictionary.ruby import GemspecMapping | from swh.indexer.metadata_dictionary.ruby import GemspecMapping | ||||
from swh.indexer.storage.model import ContentMetadataRow, RevisionIntrinsicMetadataRow | from swh.indexer.storage.model import ContentMetadataRow, DirectoryIntrinsicMetadataRow | ||||
from swh.indexer.tests.utils import DIRECTORY2, REVISION | from swh.indexer.tests.utils import DIRECTORY2 | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import Directory, DirectoryEntry, Revision | from swh.model.model import Directory, DirectoryEntry | ||||
from .utils import ( | from .utils import ( | ||||
BASE_TEST_CONFIG, | BASE_TEST_CONFIG, | ||||
YARN_PARSER_METADATA, | YARN_PARSER_METADATA, | ||||
fill_obj_storage, | fill_obj_storage, | ||||
fill_storage, | fill_storage, | ||||
json_document_strategy, | json_document_strategy, | ||||
xml_document_strategy, | xml_document_strategy, | ||||
) | ) | ||||
TRANSLATOR_TOOL = { | TRANSLATOR_TOOL = { | ||||
"name": "swh-metadata-translator", | "name": "swh-metadata-translator", | ||||
"version": "0.0.2", | "version": "0.0.2", | ||||
"configuration": {"type": "local", "context": "NpmMapping"}, | "configuration": {"type": "local", "context": "NpmMapping"}, | ||||
} | } | ||||
class ContentMetadataTestIndexer(ContentMetadataIndexer): | class ContentMetadataTestIndexer(ContentMetadataIndexer): | ||||
"""Specific Metadata whose configuration is enough to satisfy the | """Specific Metadata whose configuration is enough to satisfy the | ||||
indexing tests. | indexing tests. | ||||
""" | """ | ||||
def parse_config_file(self, *args, **kwargs): | def parse_config_file(self, *args, **kwargs): | ||||
assert False, "should not be called; the rev indexer configures it." | assert False, "should not be called; the dir indexer configures it." | ||||
REVISION_METADATA_CONFIG = { | DIRECTORY_METADATA_CONFIG = { | ||||
**BASE_TEST_CONFIG, | **BASE_TEST_CONFIG, | ||||
"tools": TRANSLATOR_TOOL, | "tools": TRANSLATOR_TOOL, | ||||
} | } | ||||
class Metadata(unittest.TestCase): | class TestMetadata: | ||||
""" | """ | ||||
Tests metadata_mock_tool tool for Metadata detection | Tests metadata_mock_tool tool for Metadata detection | ||||
""" | """ | ||||
def setUp(self): | def setup_method(self): | ||||
""" | |||||
shows the entire diff in the results | |||||
""" | |||||
self.maxDiff = None | |||||
self.npm_mapping = MAPPINGS["NpmMapping"]() | self.npm_mapping = MAPPINGS["NpmMapping"]() | ||||
self.codemeta_mapping = MAPPINGS["CodemetaMapping"]() | self.codemeta_mapping = MAPPINGS["CodemetaMapping"]() | ||||
self.maven_mapping = MAPPINGS["MavenMapping"]() | self.maven_mapping = MAPPINGS["MavenMapping"]() | ||||
self.pkginfo_mapping = MAPPINGS["PythonPkginfoMapping"]() | self.pkginfo_mapping = MAPPINGS["PythonPkginfoMapping"]() | ||||
self.gemspec_mapping = MAPPINGS["GemspecMapping"]() | self.gemspec_mapping = MAPPINGS["GemspecMapping"]() | ||||
self.cff_mapping = MAPPINGS["CffMapping"]() | self.cff_mapping = MAPPINGS["CffMapping"]() | ||||
def test_compute_metadata_none(self): | def test_compute_metadata_none(self): | ||||
""" | """ | ||||
testing content empty content is empty | testing content empty content is empty | ||||
should return None | should return None | ||||
""" | """ | ||||
# given | # given | ||||
content = b"" | content = b"" | ||||
# None if no metadata was found or an error occurred | # None if no metadata was found or an error occurred | ||||
declared_metadata = None | declared_metadata = None | ||||
# when | # when | ||||
result = self.npm_mapping.translate(content) | result = self.npm_mapping.translate(content) | ||||
# then | # then | ||||
self.assertEqual(declared_metadata, result) | assert declared_metadata == result | ||||
def test_compute_metadata_cff(self): | def test_compute_metadata_cff(self): | ||||
""" | """ | ||||
testing CITATION.cff translation | testing CITATION.cff translation | ||||
""" | """ | ||||
# given | # given | ||||
content = """# YAML 1.2 | content = """# YAML 1.2 | ||||
--- | --- | ||||
▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines | RIS, schema.org, CodeMeta, and .zenodo.json.""", | ||||
"keywords": ["citation", "bibliography", "cff", "CITATION.cff"], | "keywords": ["citation", "bibliography", "cff", "CITATION.cff"], | ||||
"license": "https://spdx.org/licenses/Apache-2.0", | "license": "https://spdx.org/licenses/Apache-2.0", | ||||
"version": "1.4.0-alpha0", | "version": "1.4.0-alpha0", | ||||
} | } | ||||
# when | # when | ||||
result = self.cff_mapping.translate(content) | result = self.cff_mapping.translate(content) | ||||
# then | # then | ||||
self.assertEqual(expected, result) | assert expected == result | ||||
def test_compute_metadata_npm(self): | def test_compute_metadata_npm(self): | ||||
""" | """ | ||||
testing only computation of metadata with hard_mapping_npm | testing only computation of metadata with hard_mapping_npm | ||||
""" | """ | ||||
# given | # given | ||||
content = b""" | content = b""" | ||||
{ | { | ||||
"name": "test_metadata", | "name": "test_metadata", | ||||
"version": "0.0.2", | "version": "0.0.2", | ||||
"description": "Simple package.json test for indexer", | "description": "Simple package.json test for indexer", | ||||
"repository": { | "repository": { | ||||
"type": "git", | "type": "git", | ||||
"url": "https://github.com/moranegg/metadata_test" | "url": "https://github.com/moranegg/metadata_test" | ||||
}, | }, | ||||
vlorentz: you can simplify this example, to only keep what matters to the test. It makes the test more… | |||||
Done Inline Actionsok, I'll take the unnecessary part out. Would you like me to add a comment describing what is wrong in content? VickyMerzOwn: ok, I'll take the unnecessary part out. Would you like me to add a comment describing what is… | |||||
Done Inline Actionsyes, please vlorentz: yes, please | |||||
"author": { | "author": { | ||||
"email": "moranegg@example.com", | "email": "moranegg@example.com", | ||||
"name": "Morane G" | "name": "Morane G" | ||||
} | } | ||||
} | } | ||||
""" | """ | ||||
declared_metadata = { | declared_metadata = { | ||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
Show All 9 Lines | def test_compute_metadata_npm(self): | ||||
"email": "moranegg@example.com", | "email": "moranegg@example.com", | ||||
} | } | ||||
], | ], | ||||
} | } | ||||
# when | # when | ||||
result = self.npm_mapping.translate(content) | result = self.npm_mapping.translate(content) | ||||
# then | # then | ||||
self.assertEqual(declared_metadata, result) | assert declared_metadata == result | ||||
def test_index_content_metadata_npm(self): | def test_index_content_metadata_npm(self): | ||||
""" | """ | ||||
testing NPM with package.json | testing NPM with package.json | ||||
- one sha1 uses a file that can't be translated to metadata and | - one sha1 uses a file that can't be translated to metadata and | ||||
should return None in the translated metadata | should return None in the translated metadata | ||||
""" | """ | ||||
# given | # given | ||||
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines | def test_index_content_metadata_npm(self): | ||||
}, | }, | ||||
), | ), | ||||
] | ] | ||||
for result in results: | for result in results: | ||||
del result.tool["id"] | del result.tool["id"] | ||||
# The assertion below returns False sometimes because of nested lists | # The assertion below returns False sometimes because of nested lists | ||||
self.assertEqual(expected_results, results) | assert expected_results == results | ||||
def test_npm_bugs_normalization(self): | def test_npm_bugs_normalization(self): | ||||
# valid dictionary | # valid dictionary | ||||
package_json = b"""{ | package_json = b"""{ | ||||
"name": "foo", | "name": "foo", | ||||
"bugs": { | "bugs": { | ||||
"url": "https://github.com/owner/project/issues", | "url": "https://github.com/owner/project/issues", | ||||
"email": "foo@example.com" | "email": "foo@example.com" | ||||
} | } | ||||
}""" | }""" | ||||
result = self.npm_mapping.translate(package_json) | result = self.npm_mapping.translate(package_json) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"name": "foo", | "name": "foo", | ||||
"issueTracker": "https://github.com/owner/project/issues", | "issueTracker": "https://github.com/owner/project/issues", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
}, | } | ||||
) | |||||
# "invalid" dictionary | # "invalid" dictionary | ||||
package_json = b"""{ | package_json = b"""{ | ||||
"name": "foo", | "name": "foo", | ||||
"bugs": { | "bugs": { | ||||
"email": "foo@example.com" | "email": "foo@example.com" | ||||
} | } | ||||
}""" | }""" | ||||
result = self.npm_mapping.translate(package_json) | result = self.npm_mapping.translate(package_json) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"name": "foo", | "name": "foo", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
}, | } | ||||
) | |||||
# string | # string | ||||
package_json = b"""{ | package_json = b"""{ | ||||
"name": "foo", | "name": "foo", | ||||
"bugs": "https://github.com/owner/project/issues" | "bugs": "https://github.com/owner/project/issues" | ||||
}""" | }""" | ||||
result = self.npm_mapping.translate(package_json) | result = self.npm_mapping.translate(package_json) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"name": "foo", | "name": "foo", | ||||
"issueTracker": "https://github.com/owner/project/issues", | "issueTracker": "https://github.com/owner/project/issues", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
}, | } | ||||
) | |||||
def test_npm_repository_normalization(self): | def test_npm_repository_normalization(self): | ||||
# normal | # normal | ||||
package_json = b"""{ | package_json = b"""{ | ||||
"name": "foo", | "name": "foo", | ||||
"repository": { | "repository": { | ||||
"type" : "git", | "type" : "git", | ||||
"url" : "https://github.com/npm/cli.git" | "url" : "https://github.com/npm/cli.git" | ||||
} | } | ||||
}""" | }""" | ||||
result = self.npm_mapping.translate(package_json) | result = self.npm_mapping.translate(package_json) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"name": "foo", | "name": "foo", | ||||
"codeRepository": "git+https://github.com/npm/cli.git", | "codeRepository": "git+https://github.com/npm/cli.git", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
}, | } | ||||
) | |||||
# missing url | # missing url | ||||
package_json = b"""{ | package_json = b"""{ | ||||
"name": "foo", | "name": "foo", | ||||
"repository": { | "repository": { | ||||
"type" : "git" | "type" : "git" | ||||
} | } | ||||
}""" | }""" | ||||
result = self.npm_mapping.translate(package_json) | result = self.npm_mapping.translate(package_json) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"name": "foo", | "name": "foo", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
}, | } | ||||
) | |||||
# github shortcut | # github shortcut | ||||
package_json = b"""{ | package_json = b"""{ | ||||
"name": "foo", | "name": "foo", | ||||
"repository": "github:npm/cli" | "repository": "github:npm/cli" | ||||
}""" | }""" | ||||
result = self.npm_mapping.translate(package_json) | result = self.npm_mapping.translate(package_json) | ||||
expected_result = { | expected_result = { | ||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"name": "foo", | "name": "foo", | ||||
"codeRepository": "git+https://github.com/npm/cli.git", | "codeRepository": "git+https://github.com/npm/cli.git", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
} | } | ||||
self.assertEqual(result, expected_result) | assert result == expected_result | ||||
# github shortshortcut | # github shortshortcut | ||||
package_json = b"""{ | package_json = b"""{ | ||||
"name": "foo", | "name": "foo", | ||||
"repository": "npm/cli" | "repository": "npm/cli" | ||||
}""" | }""" | ||||
result = self.npm_mapping.translate(package_json) | result = self.npm_mapping.translate(package_json) | ||||
self.assertEqual(result, expected_result) | assert result == expected_result | ||||
# gitlab shortcut | # gitlab shortcut | ||||
package_json = b"""{ | package_json = b"""{ | ||||
"name": "foo", | "name": "foo", | ||||
"repository": "gitlab:user/repo" | "repository": "gitlab:user/repo" | ||||
}""" | }""" | ||||
result = self.npm_mapping.translate(package_json) | result = self.npm_mapping.translate(package_json) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"name": "foo", | "name": "foo", | ||||
"codeRepository": "git+https://gitlab.com/user/repo.git", | "codeRepository": "git+https://gitlab.com/user/repo.git", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
}, | } | ||||
) | |||||
def test_detect_metadata_package_json(self): | |||||
filenames = [b"package.json", b"Package.json", b"PACKAGE.json", b"PACKAGE.JSON"] | |||||
for filename in filenames: | @pytest.mark.parametrize( | ||||
with self.subTest(filename=filename): | "filename", [b"package.json", b"Package.json", b"PACKAGE.json", b"PACKAGE.JSON"] | ||||
) | |||||
def test_detect_metadata_package_json(self, filename): | |||||
# given | # given | ||||
df = [ | df = [ | ||||
{ | { | ||||
"sha1_git": b"abc", | "sha1_git": b"abc", | ||||
"name": b"index.js", | "name": b"index.js", | ||||
"target": b"abc", | "target": b"abc", | ||||
"length": 897, | "length": 897, | ||||
"status": "visible", | "status": "visible", | ||||
"type": "file", | "type": "file", | ||||
"perms": 33188, | "perms": 33188, | ||||
"dir_id": b"dir_a", | "dir_id": b"dir_a", | ||||
"sha1": b"bcd", | "sha1": b"bcd", | ||||
}, | }, | ||||
{ | { | ||||
"sha1_git": b"aab", | "sha1_git": b"aab", | ||||
"name": filename, | "name": filename, | ||||
"target": b"aab", | "target": b"aab", | ||||
"length": 712, | "length": 712, | ||||
"status": "visible", | "status": "visible", | ||||
"type": "file", | "type": "file", | ||||
"perms": 33188, | "perms": 33188, | ||||
"dir_id": b"dir_a", | "dir_id": b"dir_a", | ||||
"sha1": b"cde", | "sha1": b"cde", | ||||
}, | }, | ||||
] | ] | ||||
# when | # when | ||||
results = detect_metadata(df) | results = detect_metadata(df) | ||||
expected_results = {"NpmMapping": [b"cde"]} | expected_results = {"NpmMapping": [b"cde"]} | ||||
# then | # then | ||||
self.assertEqual(expected_results, results) | assert expected_results == results | ||||
def test_detect_metadata_codemeta_json_uppercase(self): | def test_detect_metadata_codemeta_json_uppercase(self): | ||||
# given | # given | ||||
df = [ | df = [ | ||||
{ | { | ||||
"sha1_git": b"abc", | "sha1_git": b"abc", | ||||
"name": b"index.html", | "name": b"index.html", | ||||
"target": b"abc", | "target": b"abc", | ||||
Show All 16 Lines | def test_detect_metadata_codemeta_json_uppercase(self): | ||||
"sha1": b"bcd", | "sha1": b"bcd", | ||||
}, | }, | ||||
] | ] | ||||
# when | # when | ||||
results = detect_metadata(df) | results = detect_metadata(df) | ||||
expected_results = {"CodemetaMapping": [b"bcd"]} | expected_results = {"CodemetaMapping": [b"bcd"]} | ||||
# then | # then | ||||
self.assertEqual(expected_results, results) | assert expected_results == results | ||||
def test_compute_metadata_valid_codemeta(self): | def test_compute_metadata_valid_codemeta(self): | ||||
raw_content = b"""{ | raw_content = b"""{ | ||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"@type": "SoftwareSourceCode", | "@type": "SoftwareSourceCode", | ||||
"identifier": "CodeMeta", | "identifier": "CodeMeta", | ||||
"description": "CodeMeta is a concept vocabulary that can be used to standardize the exchange of software metadata across repositories and organizations.", | "description": "CodeMeta is a concept vocabulary that can be used to standardize the exchange of software metadata across repositories and organizations.", | ||||
"name": "CodeMeta: Minimal metadata schemas for science software and code, in JSON-LD", | "name": "CodeMeta: Minimal metadata schemas for science software and code, in JSON-LD", | ||||
▲ Show 20 Lines • Show All 90 Lines • ▼ Show 20 Lines | def test_compute_metadata_valid_codemeta(self): | ||||
"in Scientific Software", | "in Scientific Software", | ||||
"keywords": ["metadata", "software"], | "keywords": ["metadata", "software"], | ||||
"version": "2.0", | "version": "2.0", | ||||
"dateCreated": "2017-06-05", | "dateCreated": "2017-06-05", | ||||
"datePublished": "2017-06-05", | "datePublished": "2017-06-05", | ||||
"programmingLanguage": "JSON-LD", | "programmingLanguage": "JSON-LD", | ||||
} | } | ||||
result = self.codemeta_mapping.translate(raw_content) | result = self.codemeta_mapping.translate(raw_content) | ||||
self.assertEqual(result, expected_result) | assert result == expected_result | ||||
def test_compute_metadata_codemeta_alternate_context(self): | def test_compute_metadata_codemeta_alternate_context(self): | ||||
raw_content = b"""{ | raw_content = b"""{ | ||||
"@context": "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld", | "@context": "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld", | ||||
"@type": "SoftwareSourceCode", | "@type": "SoftwareSourceCode", | ||||
"identifier": "CodeMeta" | "identifier": "CodeMeta" | ||||
}""" # noqa | }""" # noqa | ||||
expected_result = { | expected_result = { | ||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"identifier": "CodeMeta", | "identifier": "CodeMeta", | ||||
} | } | ||||
result = self.codemeta_mapping.translate(raw_content) | result = self.codemeta_mapping.translate(raw_content) | ||||
self.assertEqual(result, expected_result) | assert result == expected_result | ||||
def test_compute_metadata_maven(self): | def test_compute_metadata_maven(self): | ||||
raw_content = b""" | raw_content = b""" | ||||
<project> | <project> | ||||
<name>Maven Default Project</name> | <name>Maven Default Project</name> | ||||
<modelVersion>4.0.0</modelVersion> | <modelVersion>4.0.0</modelVersion> | ||||
<groupId>com.mycompany.app</groupId> | <groupId>com.mycompany.app</groupId> | ||||
<artifactId>my-app</artifactId> | <artifactId>my-app</artifactId> | ||||
Show All 14 Lines | def test_compute_metadata_maven(self): | ||||
<name>Apache License, Version 2.0</name> | <name>Apache License, Version 2.0</name> | ||||
<url>https://www.apache.org/licenses/LICENSE-2.0.txt</url> | <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url> | ||||
<distribution>repo</distribution> | <distribution>repo</distribution> | ||||
<comments>A business-friendly OSS license</comments> | <comments>A business-friendly OSS license</comments> | ||||
</license> | </license> | ||||
</licenses> | </licenses> | ||||
</project>""" | </project>""" | ||||
result = self.maven_mapping.translate(raw_content) | result = self.maven_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"name": "Maven Default Project", | "name": "Maven Default Project", | ||||
"identifier": "com.mycompany.app", | "identifier": "com.mycompany.app", | ||||
"version": "1.2.3", | "version": "1.2.3", | ||||
"license": "https://www.apache.org/licenses/LICENSE-2.0.txt", | "license": "https://www.apache.org/licenses/LICENSE-2.0.txt", | ||||
"codeRepository": ( | "codeRepository": ( | ||||
"http://repo1.maven.org/maven2/com/mycompany/app/my-app" | "http://repo1.maven.org/maven2/com/mycompany/app/my-app" | ||||
), | ), | ||||
}, | } | ||||
) | |||||
def test_compute_metadata_maven_empty(self): | def test_compute_metadata_maven_empty(self): | ||||
raw_content = b""" | raw_content = b""" | ||||
<project> | <project> | ||||
</project>""" | </project>""" | ||||
result = self.maven_mapping.translate(raw_content) | result = self.maven_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
}, | } | ||||
) | |||||
def test_compute_metadata_maven_almost_empty(self): | def test_compute_metadata_maven_almost_empty(self): | ||||
raw_content = b""" | raw_content = b""" | ||||
<project> | <project> | ||||
<foo/> | <foo/> | ||||
</project>""" | </project>""" | ||||
result = self.maven_mapping.translate(raw_content) | result = self.maven_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
}, | } | ||||
) | |||||
def test_compute_metadata_maven_invalid_xml(self): | def test_compute_metadata_maven_invalid_xml(self, caplog): | ||||
expected_warning = ( | expected_warning = ( | ||||
"WARNING:swh.indexer.metadata_dictionary.maven.MavenMapping:" | "swh.indexer.metadata_dictionary.maven.MavenMapping", | ||||
"Error parsing XML from foo" | logging.WARNING, | ||||
"Error parsing XML from foo", | |||||
) | ) | ||||
caplog.at_level(logging.WARNING, logger="swh.indexer.metadata_dictionary") | |||||
raw_content = b""" | raw_content = b""" | ||||
<project>""" | <project>""" | ||||
with self.assertLogs("swh.indexer.metadata_dictionary", level="WARNING") as cm: | caplog.clear() | ||||
result = MAPPINGS["MavenMapping"]("foo").translate(raw_content) | result = MAPPINGS["MavenMapping"]("foo").translate(raw_content) | ||||
self.assertEqual(cm.output, [expected_warning]) | assert caplog.record_tuples == [expected_warning] | ||||
self.assertEqual(result, None) | assert result is None | ||||
raw_content = b""" | raw_content = b""" | ||||
""" | """ | ||||
with self.assertLogs("swh.indexer.metadata_dictionary", level="WARNING") as cm: | caplog.clear() | ||||
result = MAPPINGS["MavenMapping"]("foo").translate(raw_content) | result = MAPPINGS["MavenMapping"]("foo").translate(raw_content) | ||||
self.assertEqual(cm.output, [expected_warning]) | assert caplog.record_tuples == [expected_warning] | ||||
self.assertEqual(result, None) | assert result is None | ||||
def test_compute_metadata_maven_unknown_encoding(self): | def test_compute_metadata_maven_unknown_encoding(self, caplog): | ||||
expected_warning = ( | expected_warning = ( | ||||
"WARNING:swh.indexer.metadata_dictionary.maven.MavenMapping:" | "swh.indexer.metadata_dictionary.maven.MavenMapping", | ||||
"Error detecting XML encoding from foo" | logging.WARNING, | ||||
"Error detecting XML encoding from foo", | |||||
) | ) | ||||
caplog.at_level(logging.WARNING, logger="swh.indexer.metadata_dictionary") | |||||
raw_content = b"""<?xml version="1.0" encoding="foo"?> | raw_content = b"""<?xml version="1.0" encoding="foo"?> | ||||
<project> | <project> | ||||
</project>""" | </project>""" | ||||
with self.assertLogs("swh.indexer.metadata_dictionary", level="WARNING") as cm: | caplog.clear() | ||||
result = MAPPINGS["MavenMapping"]("foo").translate(raw_content) | result = MAPPINGS["MavenMapping"]("foo").translate(raw_content) | ||||
self.assertEqual(cm.output, [expected_warning]) | assert caplog.record_tuples == [expected_warning] | ||||
self.assertEqual(result, None) | assert result is None | ||||
raw_content = b"""<?xml version="1.0" encoding="UTF-7"?> | raw_content = b"""<?xml version="1.0" encoding="UTF-7"?> | ||||
<project> | <project> | ||||
</project>""" | </project>""" | ||||
with self.assertLogs("swh.indexer.metadata_dictionary", level="WARNING") as cm: | caplog.clear() | ||||
result = MAPPINGS["MavenMapping"]("foo").translate(raw_content) | result = MAPPINGS["MavenMapping"]("foo").translate(raw_content) | ||||
self.assertEqual(cm.output, [expected_warning]) | assert caplog.record_tuples == [expected_warning] | ||||
self.assertEqual(result, None) | assert result is None | ||||
def test_compute_metadata_maven_invalid_encoding(self): | def test_compute_metadata_maven_invalid_encoding(self, caplog): | ||||
expected_warning = [ | expected_warning = [ | ||||
# libexpat1 <= 2.2.10-2+deb11u1 | # libexpat1 <= 2.2.10-2+deb11u1 | ||||
[ | [ | ||||
( | ( | ||||
"WARNING:swh.indexer.metadata_dictionary.maven.MavenMapping:" | "swh.indexer.metadata_dictionary.maven.MavenMapping", | ||||
"Error unidecoding XML from foo" | logging.WARNING, | ||||
"Error unidecoding XML from foo", | |||||
) | ) | ||||
], | ], | ||||
# libexpat1 >= 2.2.10-2+deb11u2 | # libexpat1 >= 2.2.10-2+deb11u2 | ||||
[ | [ | ||||
( | ( | ||||
"WARNING:swh.indexer.metadata_dictionary.maven.MavenMapping:" | "swh.indexer.metadata_dictionary.maven.MavenMapping", | ||||
"Error parsing XML from foo" | logging.WARNING, | ||||
"Error parsing XML from foo", | |||||
) | ) | ||||
], | ], | ||||
] | ] | ||||
caplog.at_level(logging.WARNING, logger="swh.indexer.metadata_dictionary") | |||||
raw_content = b"""<?xml version="1.0" encoding="UTF-8"?> | raw_content = b"""<?xml version="1.0" encoding="UTF-8"?> | ||||
<foo\xe5ct> | <foo\xe5ct> | ||||
</foo>""" | </foo>""" | ||||
with self.assertLogs("swh.indexer.metadata_dictionary", level="WARNING") as cm: | caplog.clear() | ||||
result = MAPPINGS["MavenMapping"]("foo").translate(raw_content) | result = MAPPINGS["MavenMapping"]("foo").translate(raw_content) | ||||
self.assertIn(cm.output, expected_warning) | assert caplog.record_tuples in expected_warning | ||||
self.assertEqual(result, None) | assert result is None | ||||
def test_compute_metadata_maven_minimal(self): | def test_compute_metadata_maven_minimal(self): | ||||
raw_content = b""" | raw_content = b""" | ||||
<project> | <project> | ||||
<name>Maven Default Project</name> | <name>Maven Default Project</name> | ||||
<modelVersion>4.0.0</modelVersion> | <modelVersion>4.0.0</modelVersion> | ||||
<groupId>com.mycompany.app</groupId> | <groupId>com.mycompany.app</groupId> | ||||
<artifactId>my-app</artifactId> | <artifactId>my-app</artifactId> | ||||
<version>1.2.3</version> | <version>1.2.3</version> | ||||
</project>""" | </project>""" | ||||
result = self.maven_mapping.translate(raw_content) | result = self.maven_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"name": "Maven Default Project", | "name": "Maven Default Project", | ||||
"identifier": "com.mycompany.app", | "identifier": "com.mycompany.app", | ||||
"version": "1.2.3", | "version": "1.2.3", | ||||
"codeRepository": ( | "codeRepository": ( | ||||
"https://repo.maven.apache.org/maven2/com/mycompany/app/my-app" | "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app" | ||||
), | ), | ||||
}, | } | ||||
) | |||||
def test_compute_metadata_maven_empty_nodes(self): | def test_compute_metadata_maven_empty_nodes(self): | ||||
raw_content = b""" | raw_content = b""" | ||||
<project> | <project> | ||||
<name>Maven Default Project</name> | <name>Maven Default Project</name> | ||||
<modelVersion>4.0.0</modelVersion> | <modelVersion>4.0.0</modelVersion> | ||||
<groupId>com.mycompany.app</groupId> | <groupId>com.mycompany.app</groupId> | ||||
<artifactId>my-app</artifactId> | <artifactId>my-app</artifactId> | ||||
<version>1.2.3</version> | <version>1.2.3</version> | ||||
<repositories> | <repositories> | ||||
</repositories> | </repositories> | ||||
</project>""" | </project>""" | ||||
result = self.maven_mapping.translate(raw_content) | result = self.maven_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"name": "Maven Default Project", | "name": "Maven Default Project", | ||||
"identifier": "com.mycompany.app", | "identifier": "com.mycompany.app", | ||||
"version": "1.2.3", | "version": "1.2.3", | ||||
"codeRepository": ( | "codeRepository": ( | ||||
"https://repo.maven.apache.org/maven2/com/mycompany/app/my-app" | "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app" | ||||
), | ), | ||||
}, | } | ||||
) | |||||
raw_content = b""" | raw_content = b""" | ||||
<project> | <project> | ||||
<name>Maven Default Project</name> | <name>Maven Default Project</name> | ||||
<modelVersion>4.0.0</modelVersion> | <modelVersion>4.0.0</modelVersion> | ||||
<groupId>com.mycompany.app</groupId> | <groupId>com.mycompany.app</groupId> | ||||
<artifactId>my-app</artifactId> | <artifactId>my-app</artifactId> | ||||
<version></version> | <version></version> | ||||
</project>""" | </project>""" | ||||
result = self.maven_mapping.translate(raw_content) | result = self.maven_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"name": "Maven Default Project", | "name": "Maven Default Project", | ||||
"identifier": "com.mycompany.app", | "identifier": "com.mycompany.app", | ||||
"codeRepository": ( | "codeRepository": ( | ||||
"https://repo.maven.apache.org/maven2/com/mycompany/app/my-app" | "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app" | ||||
), | ), | ||||
}, | } | ||||
) | |||||
raw_content = b""" | raw_content = b""" | ||||
<project> | <project> | ||||
<name></name> | <name></name> | ||||
<modelVersion>4.0.0</modelVersion> | <modelVersion>4.0.0</modelVersion> | ||||
<groupId>com.mycompany.app</groupId> | <groupId>com.mycompany.app</groupId> | ||||
<artifactId>my-app</artifactId> | <artifactId>my-app</artifactId> | ||||
<version>1.2.3</version> | <version>1.2.3</version> | ||||
</project>""" | </project>""" | ||||
result = self.maven_mapping.translate(raw_content) | result = self.maven_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"identifier": "com.mycompany.app", | "identifier": "com.mycompany.app", | ||||
"version": "1.2.3", | "version": "1.2.3", | ||||
"codeRepository": ( | "codeRepository": ( | ||||
"https://repo.maven.apache.org/maven2/com/mycompany/app/my-app" | "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app" | ||||
), | ), | ||||
}, | } | ||||
) | |||||
raw_content = b""" | raw_content = b""" | ||||
<project> | <project> | ||||
<name>Maven Default Project</name> | <name>Maven Default Project</name> | ||||
<modelVersion>4.0.0</modelVersion> | <modelVersion>4.0.0</modelVersion> | ||||
<groupId>com.mycompany.app</groupId> | <groupId>com.mycompany.app</groupId> | ||||
<artifactId>my-app</artifactId> | <artifactId>my-app</artifactId> | ||||
<version>1.2.3</version> | <version>1.2.3</version> | ||||
<licenses> | <licenses> | ||||
</licenses> | </licenses> | ||||
</project>""" | </project>""" | ||||
result = self.maven_mapping.translate(raw_content) | result = self.maven_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"name": "Maven Default Project", | "name": "Maven Default Project", | ||||
"identifier": "com.mycompany.app", | "identifier": "com.mycompany.app", | ||||
"version": "1.2.3", | "version": "1.2.3", | ||||
"codeRepository": ( | "codeRepository": ( | ||||
"https://repo.maven.apache.org/maven2/com/mycompany/app/my-app" | "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app" | ||||
), | ), | ||||
}, | } | ||||
) | |||||
raw_content = b""" | raw_content = b""" | ||||
<project> | <project> | ||||
<groupId></groupId> | <groupId></groupId> | ||||
<version>1.2.3</version> | <version>1.2.3</version> | ||||
</project>""" | </project>""" | ||||
result = self.maven_mapping.translate(raw_content) | result = self.maven_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"version": "1.2.3", | "version": "1.2.3", | ||||
}, | } | ||||
) | |||||
def test_compute_metadata_maven_invalid_licenses(self): | def test_compute_metadata_maven_invalid_licenses(self): | ||||
raw_content = b""" | raw_content = b""" | ||||
<project> | <project> | ||||
<name>Maven Default Project</name> | <name>Maven Default Project</name> | ||||
<modelVersion>4.0.0</modelVersion> | <modelVersion>4.0.0</modelVersion> | ||||
<groupId>com.mycompany.app</groupId> | <groupId>com.mycompany.app</groupId> | ||||
<artifactId>my-app</artifactId> | <artifactId>my-app</artifactId> | ||||
<version>1.2.3</version> | <version>1.2.3</version> | ||||
<licenses> | <licenses> | ||||
foo | foo | ||||
</licenses> | </licenses> | ||||
</project>""" | </project>""" | ||||
result = self.maven_mapping.translate(raw_content) | result = self.maven_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"name": "Maven Default Project", | "name": "Maven Default Project", | ||||
"identifier": "com.mycompany.app", | "identifier": "com.mycompany.app", | ||||
"version": "1.2.3", | "version": "1.2.3", | ||||
"codeRepository": ( | "codeRepository": ( | ||||
"https://repo.maven.apache.org/maven2/com/mycompany/app/my-app" | "https://repo.maven.apache.org/maven2/com/mycompany/app/my-app" | ||||
), | ), | ||||
}, | } | ||||
) | |||||
def test_compute_metadata_maven_multiple(self): | def test_compute_metadata_maven_multiple(self): | ||||
"""Tests when there are multiple code repos and licenses.""" | """Tests when there are multiple code repos and licenses.""" | ||||
raw_content = b""" | raw_content = b""" | ||||
<project> | <project> | ||||
<name>Maven Default Project</name> | <name>Maven Default Project</name> | ||||
<modelVersion>4.0.0</modelVersion> | <modelVersion>4.0.0</modelVersion> | ||||
<groupId>com.mycompany.app</groupId> | <groupId>com.mycompany.app</groupId> | ||||
Show All 25 Lines | def test_compute_metadata_maven_multiple(self): | ||||
</license> | </license> | ||||
<license> | <license> | ||||
<name>MIT license</name> | <name>MIT license</name> | ||||
<url>https://opensource.org/licenses/MIT</url> | <url>https://opensource.org/licenses/MIT</url> | ||||
</license> | </license> | ||||
</licenses> | </licenses> | ||||
</project>""" | </project>""" | ||||
result = self.maven_mapping.translate(raw_content) | result = self.maven_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"name": "Maven Default Project", | "name": "Maven Default Project", | ||||
"identifier": "com.mycompany.app", | "identifier": "com.mycompany.app", | ||||
"version": "1.2.3", | "version": "1.2.3", | ||||
"license": [ | "license": [ | ||||
"https://www.apache.org/licenses/LICENSE-2.0.txt", | "https://www.apache.org/licenses/LICENSE-2.0.txt", | ||||
"https://opensource.org/licenses/MIT", | "https://opensource.org/licenses/MIT", | ||||
], | ], | ||||
"codeRepository": [ | "codeRepository": [ | ||||
"http://repo1.maven.org/maven2/com/mycompany/app/my-app", | "http://repo1.maven.org/maven2/com/mycompany/app/my-app", | ||||
"http://example.org/maven2/com/mycompany/app/my-app", | "http://example.org/maven2/com/mycompany/app/my-app", | ||||
], | ], | ||||
}, | } | ||||
) | |||||
def test_compute_metadata_pkginfo(self): | def test_compute_metadata_pkginfo(self): | ||||
raw_content = b"""\ | raw_content = b"""\ | ||||
Metadata-Version: 2.1 | Metadata-Version: 2.1 | ||||
Name: swh.core | Name: swh.core | ||||
Version: 0.0.49 | Version: 0.0.49 | ||||
Summary: Software Heritage core utilities | Summary: Software Heritage core utilities | ||||
Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ | Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ | ||||
Show All 17 Lines | |||||
Classifier: Intended Audience :: Developers | Classifier: Intended Audience :: Developers | ||||
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) | Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) | ||||
Classifier: Operating System :: OS Independent | Classifier: Operating System :: OS Independent | ||||
Classifier: Development Status :: 5 - Production/Stable | Classifier: Development Status :: 5 - Production/Stable | ||||
Description-Content-Type: text/markdown | Description-Content-Type: text/markdown | ||||
Provides-Extra: testing | Provides-Extra: testing | ||||
""" # noqa | """ # noqa | ||||
result = self.pkginfo_mapping.translate(raw_content) | result = self.pkginfo_mapping.translate(raw_content) | ||||
self.assertCountEqual( | assert result["description"] == [ | ||||
result["description"], | |||||
[ | |||||
"Software Heritage core utilities", # note the comma here | "Software Heritage core utilities", # note the comma here | ||||
"swh-core\n" | "swh-core\n" | ||||
"========\n" | "========\n" | ||||
"\n" | "\n" | ||||
"core library for swh's modules:\n" | "core library for swh's modules:\n" | ||||
"- config parser\n" | "- config parser\n" | ||||
"- hash computations\n" | "- hash computations\n" | ||||
"- serialization\n" | "- serialization\n" | ||||
"- logging mechanism\n" | "- logging mechanism\n" | ||||
"", | "", | ||||
], | ], result | ||||
result, | |||||
) | |||||
del result["description"] | del result["description"] | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"url": "https://forge.softwareheritage.org/diffusion/DCORE/", | "url": "https://forge.softwareheritage.org/diffusion/DCORE/", | ||||
"name": "swh.core", | "name": "swh.core", | ||||
"author": [ | "author": [ | ||||
{ | { | ||||
"type": "Person", | "type": "Person", | ||||
"name": "Software Heritage developers", | "name": "Software Heritage developers", | ||||
"email": "swh-devel@inria.fr", | "email": "swh-devel@inria.fr", | ||||
} | } | ||||
], | ], | ||||
"version": "0.0.49", | "version": "0.0.49", | ||||
}, | } | ||||
) | |||||
def test_compute_metadata_pkginfo_utf8(self): | def test_compute_metadata_pkginfo_utf8(self): | ||||
raw_content = b"""\ | raw_content = b"""\ | ||||
Metadata-Version: 1.1 | Metadata-Version: 1.1 | ||||
Name: snowpyt | Name: snowpyt | ||||
Description-Content-Type: UNKNOWN | Description-Content-Type: UNKNOWN | ||||
Description: foo | Description: foo | ||||
Hydrology N\xc2\xb083 | Hydrology N\xc2\xb083 | ||||
""" # noqa | """ # noqa | ||||
result = self.pkginfo_mapping.translate(raw_content) | result = self.pkginfo_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"name": "snowpyt", | "name": "snowpyt", | ||||
"description": "foo\nHydrology N°83", | "description": "foo\nHydrology N°83", | ||||
}, | } | ||||
) | |||||
def test_compute_metadata_pkginfo_keywords(self): | def test_compute_metadata_pkginfo_keywords(self): | ||||
raw_content = b"""\ | raw_content = b"""\ | ||||
Metadata-Version: 2.1 | Metadata-Version: 2.1 | ||||
Name: foo | Name: foo | ||||
Keywords: foo bar baz | Keywords: foo bar baz | ||||
""" # noqa | """ # noqa | ||||
result = self.pkginfo_mapping.translate(raw_content) | result = self.pkginfo_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"name": "foo", | "name": "foo", | ||||
"keywords": ["foo", "bar", "baz"], | "keywords": ["foo", "bar", "baz"], | ||||
}, | } | ||||
) | |||||
def test_compute_metadata_pkginfo_license(self): | def test_compute_metadata_pkginfo_license(self): | ||||
raw_content = b"""\ | raw_content = b"""\ | ||||
Metadata-Version: 2.1 | Metadata-Version: 2.1 | ||||
Name: foo | Name: foo | ||||
License: MIT | License: MIT | ||||
""" # noqa | """ # noqa | ||||
result = self.pkginfo_mapping.translate(raw_content) | result = self.pkginfo_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"name": "foo", | "name": "foo", | ||||
"license": "MIT", | "license": "MIT", | ||||
}, | } | ||||
) | |||||
def test_gemspec_base(self): | def test_gemspec_base(self): | ||||
raw_content = b""" | raw_content = b""" | ||||
Gem::Specification.new do |s| | Gem::Specification.new do |s| | ||||
s.name = 'example' | s.name = 'example' | ||||
s.version = '0.1.0' | s.version = '0.1.0' | ||||
s.licenses = ['MIT'] | s.licenses = ['MIT'] | ||||
s.summary = "This is an example!" | s.summary = "This is an example!" | ||||
s.description = "Much longer explanation of the example!" | s.description = "Much longer explanation of the example!" | ||||
s.authors = ["Ruby Coder"] | s.authors = ["Ruby Coder"] | ||||
s.email = 'rubycoder@example.com' | s.email = 'rubycoder@example.com' | ||||
s.files = ["lib/example.rb"] | s.files = ["lib/example.rb"] | ||||
s.homepage = 'https://rubygems.org/gems/example' | s.homepage = 'https://rubygems.org/gems/example' | ||||
s.metadata = { "source_code_uri" => "https://github.com/example/example" } | s.metadata = { "source_code_uri" => "https://github.com/example/example" } | ||||
end""" | end""" | ||||
result = self.gemspec_mapping.translate(raw_content) | result = self.gemspec_mapping.translate(raw_content) | ||||
self.assertCountEqual( | assert set(result.pop("description")) == { | ||||
result.pop("description"), | "This is an example!", | ||||
["This is an example!", "Much longer explanation of the example!"], | "Much longer explanation of the example!", | ||||
) | } | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"author": [{"type": "Person", "name": "Ruby Coder"}], | "author": [{"type": "Person", "name": "Ruby Coder"}], | ||||
"name": "example", | "name": "example", | ||||
"license": "https://spdx.org/licenses/MIT", | "license": "https://spdx.org/licenses/MIT", | ||||
"codeRepository": "https://rubygems.org/gems/example", | "codeRepository": "https://rubygems.org/gems/example", | ||||
"email": "rubycoder@example.com", | "email": "rubycoder@example.com", | ||||
"version": "0.1.0", | "version": "0.1.0", | ||||
}, | } | ||||
) | |||||
def test_gemspec_two_author_fields(self): | def test_gemspec_two_author_fields(self): | ||||
raw_content = b""" | raw_content = b""" | ||||
Gem::Specification.new do |s| | Gem::Specification.new do |s| | ||||
s.authors = ["Ruby Coder1"] | s.authors = ["Ruby Coder1"] | ||||
s.author = "Ruby Coder2" | s.author = "Ruby Coder2" | ||||
end""" | end""" | ||||
result = self.gemspec_mapping.translate(raw_content) | result = self.gemspec_mapping.translate(raw_content) | ||||
self.assertCountEqual( | assert result.pop("author") in ( | ||||
result.pop("author"), | |||||
[ | [ | ||||
{"type": "Person", "name": "Ruby Coder1"}, | {"type": "Person", "name": "Ruby Coder1"}, | ||||
{"type": "Person", "name": "Ruby Coder2"}, | {"type": "Person", "name": "Ruby Coder2"}, | ||||
], | ], | ||||
[ | |||||
{"type": "Person", "name": "Ruby Coder2"}, | |||||
{"type": "Person", "name": "Ruby Coder1"}, | |||||
], | |||||
) | ) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
}, | } | ||||
) | |||||
def test_gemspec_invalid_author(self): | def test_gemspec_invalid_author(self): | ||||
raw_content = b""" | raw_content = b""" | ||||
Gem::Specification.new do |s| | Gem::Specification.new do |s| | ||||
s.author = ["Ruby Coder"] | s.author = ["Ruby Coder"] | ||||
end""" | end""" | ||||
result = self.gemspec_mapping.translate(raw_content) | result = self.gemspec_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
}, | } | ||||
) | |||||
raw_content = b""" | raw_content = b""" | ||||
Gem::Specification.new do |s| | Gem::Specification.new do |s| | ||||
s.author = "Ruby Coder1", | s.author = "Ruby Coder1", | ||||
end""" | end""" | ||||
result = self.gemspec_mapping.translate(raw_content) | result = self.gemspec_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
}, | } | ||||
) | |||||
raw_content = b""" | raw_content = b""" | ||||
Gem::Specification.new do |s| | Gem::Specification.new do |s| | ||||
s.authors = ["Ruby Coder1", ["Ruby Coder2"]] | s.authors = ["Ruby Coder1", ["Ruby Coder2"]] | ||||
end""" | end""" | ||||
result = self.gemspec_mapping.translate(raw_content) | result = self.gemspec_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"author": [{"type": "Person", "name": "Ruby Coder1"}], | "author": [{"type": "Person", "name": "Ruby Coder1"}], | ||||
}, | } | ||||
) | |||||
def test_gemspec_alternative_header(self): | def test_gemspec_alternative_header(self): | ||||
raw_content = b""" | raw_content = b""" | ||||
require './lib/version' | require './lib/version' | ||||
Gem::Specification.new { |s| | Gem::Specification.new { |s| | ||||
s.name = 'rb-system-with-aliases' | s.name = 'rb-system-with-aliases' | ||||
s.summary = 'execute system commands with aliases' | s.summary = 'execute system commands with aliases' | ||||
} | } | ||||
""" | """ | ||||
result = self.gemspec_mapping.translate(raw_content) | result = self.gemspec_mapping.translate(raw_content) | ||||
self.assertEqual( | assert result == { | ||||
result, | |||||
{ | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"type": "SoftwareSourceCode", | "type": "SoftwareSourceCode", | ||||
"name": "rb-system-with-aliases", | "name": "rb-system-with-aliases", | ||||
"description": "execute system commands with aliases", | "description": "execute system commands with aliases", | ||||
}, | } | ||||
) | |||||
@settings(suppress_health_check=[HealthCheck.too_slow]) | @settings(suppress_health_check=[HealthCheck.too_slow]) | ||||
@given(json_document_strategy(keys=list(NpmMapping.mapping))) | @given(json_document_strategy(keys=list(NpmMapping.mapping))) | ||||
def test_npm_adversarial(self, doc): | def test_npm_adversarial(self, doc): | ||||
raw = json.dumps(doc).encode() | raw = json.dumps(doc).encode() | ||||
self.npm_mapping.translate(raw) | self.npm_mapping.translate(raw) | ||||
@settings(suppress_health_check=[HealthCheck.too_slow]) | @settings(suppress_health_check=[HealthCheck.too_slow]) | ||||
Show All 29 Lines | """ | ||||
) | ) | ||||
def test_gemspec_adversarial(self, doc): | def test_gemspec_adversarial(self, doc): | ||||
parts = [b"Gem::Specification.new do |s|\n"] | parts = [b"Gem::Specification.new do |s|\n"] | ||||
for (k, v) in doc.items(): | for (k, v) in doc.items(): | ||||
parts.append(" s.{} = {}\n".format(k, repr(v)).encode()) | parts.append(" s.{} = {}\n".format(k, repr(v)).encode()) | ||||
parts.append(b"end\n") | parts.append(b"end\n") | ||||
self.gemspec_mapping.translate(b"".join(parts)) | self.gemspec_mapping.translate(b"".join(parts)) | ||||
def test_revision_metadata_indexer(self): | def test_directory_metadata_indexer(self): | ||||
metadata_indexer = RevisionMetadataIndexer(config=REVISION_METADATA_CONFIG) | metadata_indexer = DirectoryMetadataIndexer(config=DIRECTORY_METADATA_CONFIG) | ||||
fill_obj_storage(metadata_indexer.objstorage) | fill_obj_storage(metadata_indexer.objstorage) | ||||
fill_storage(metadata_indexer.storage) | fill_storage(metadata_indexer.storage) | ||||
tool = metadata_indexer.idx_storage.indexer_configuration_get( | tool = metadata_indexer.idx_storage.indexer_configuration_get( | ||||
{f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()} | {f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()} | ||||
) | ) | ||||
assert tool is not None | assert tool is not None | ||||
rev = REVISION | dir_ = DIRECTORY2 | ||||
assert rev.directory == DIRECTORY2.id | |||||
metadata_indexer.idx_storage.content_metadata_add( | metadata_indexer.idx_storage.content_metadata_add( | ||||
[ | [ | ||||
ContentMetadataRow( | ContentMetadataRow( | ||||
id=DIRECTORY2.entries[0].target, | id=DIRECTORY2.entries[0].target, | ||||
indexer_configuration_id=tool["id"], | indexer_configuration_id=tool["id"], | ||||
metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
) | ) | ||||
] | ] | ||||
) | ) | ||||
metadata_indexer.run([rev.id]) | metadata_indexer.run([dir_.id]) | ||||
results = list( | results = list( | ||||
metadata_indexer.idx_storage.revision_intrinsic_metadata_get([REVISION.id]) | metadata_indexer.idx_storage.directory_intrinsic_metadata_get( | ||||
[DIRECTORY2.id] | |||||
) | |||||
) | ) | ||||
expected_results = [ | expected_results = [ | ||||
RevisionIntrinsicMetadataRow( | DirectoryIntrinsicMetadataRow( | ||||
id=rev.id, | id=dir_.id, | ||||
tool=TRANSLATOR_TOOL, | tool=TRANSLATOR_TOOL, | ||||
metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
mappings=["npm"], | mappings=["npm"], | ||||
) | ) | ||||
] | ] | ||||
for result in results: | for result in results: | ||||
del result.tool["id"] | del result.tool["id"] | ||||
# then | # then | ||||
self.assertEqual(results, expected_results) | assert results == expected_results | ||||
def test_revision_metadata_indexer_single_root_dir(self): | def test_directory_metadata_indexer_single_root_dir(self): | ||||
metadata_indexer = RevisionMetadataIndexer(config=REVISION_METADATA_CONFIG) | metadata_indexer = DirectoryMetadataIndexer(config=DIRECTORY_METADATA_CONFIG) | ||||
fill_obj_storage(metadata_indexer.objstorage) | fill_obj_storage(metadata_indexer.objstorage) | ||||
fill_storage(metadata_indexer.storage) | fill_storage(metadata_indexer.storage) | ||||
# Add a parent directory, that is the only directory at the root | # Add a parent directory, that is the only directory at the root | ||||
# of the revision | # of the directory | ||||
rev = REVISION | dir_ = DIRECTORY2 | ||||
assert rev.directory == DIRECTORY2.id | |||||
directory = Directory( | new_dir = Directory( | ||||
entries=( | entries=( | ||||
DirectoryEntry( | DirectoryEntry( | ||||
name=b"foobar-1.0.0", | name=b"foobar-1.0.0", | ||||
type="dir", | type="dir", | ||||
target=rev.directory, | target=dir_.id, | ||||
perms=16384, | perms=16384, | ||||
), | ), | ||||
), | ), | ||||
) | ) | ||||
assert directory.id is not None | assert new_dir.id is not None | ||||
metadata_indexer.storage.directory_add([directory]) | metadata_indexer.storage.directory_add([new_dir]) | ||||
new_rev_dict = {**rev.to_dict(), "directory": directory.id} | |||||
new_rev_dict.pop("id") | |||||
new_rev = Revision.from_dict(new_rev_dict) | |||||
metadata_indexer.storage.revision_add([new_rev]) | |||||
tool = metadata_indexer.idx_storage.indexer_configuration_get( | tool = metadata_indexer.idx_storage.indexer_configuration_get( | ||||
{f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()} | {f"tool_{k}": v for (k, v) in TRANSLATOR_TOOL.items()} | ||||
) | ) | ||||
assert tool is not None | assert tool is not None | ||||
metadata_indexer.idx_storage.content_metadata_add( | metadata_indexer.idx_storage.content_metadata_add( | ||||
[ | [ | ||||
ContentMetadataRow( | ContentMetadataRow( | ||||
id=DIRECTORY2.entries[0].target, | id=DIRECTORY2.entries[0].target, | ||||
indexer_configuration_id=tool["id"], | indexer_configuration_id=tool["id"], | ||||
metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
) | ) | ||||
] | ] | ||||
) | ) | ||||
metadata_indexer.run([new_rev.id]) | metadata_indexer.run([new_dir.id]) | ||||
results = list( | results = list( | ||||
metadata_indexer.idx_storage.revision_intrinsic_metadata_get([new_rev.id]) | metadata_indexer.idx_storage.directory_intrinsic_metadata_get([new_dir.id]) | ||||
) | ) | ||||
expected_results = [ | expected_results = [ | ||||
RevisionIntrinsicMetadataRow( | DirectoryIntrinsicMetadataRow( | ||||
id=new_rev.id, | id=new_dir.id, | ||||
tool=TRANSLATOR_TOOL, | tool=TRANSLATOR_TOOL, | ||||
metadata=YARN_PARSER_METADATA, | metadata=YARN_PARSER_METADATA, | ||||
mappings=["npm"], | mappings=["npm"], | ||||
) | ) | ||||
] | ] | ||||
for result in results: | for result in results: | ||||
del result.tool["id"] | del result.tool["id"] | ||||
# then | # then | ||||
self.assertEqual(results, expected_results) | assert results == expected_results |
you can simplify this example, to only keep what matters to the test. It makes the test more readable. (Right now, I don't see what part of this file is the syntax error)