diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -43,7 +43,7 @@
} for sha1 in ids
))
- def index(self, id, data):
+ def index(self, id, data, log_suffix='unknown revision'):
"""Index sha1s' content and store result.
Args:
@@ -63,8 +63,9 @@
}
try:
mapping_name = self.tool['tool_configuration']['context']
- result['translated_metadata'] = MAPPINGS[mapping_name] \
- .translate(data)
+ log_suffix += ', content_id=%s' % hashutil.hash_to_hex(id)
+ result['translated_metadata'] = \
+ MAPPINGS[mapping_name](log_suffix).translate(data)
except Exception:
self.log.exception(
"Problem during metadata translation "
@@ -111,7 +112,7 @@
'version': '0.0.2',
'configuration': {
'type': 'local',
- 'context': ['NpmMapping', 'CodemetaMapping']
+ 'context': list(MAPPINGS),
},
}),
}
@@ -158,7 +159,9 @@
files = [entry for entry in dir_ls if entry['type'] == 'file']
detected_files = detect_metadata(files)
result['translated_metadata'] = self.translate_revision_metadata(
- detected_files)
+ detected_files,
+ log_suffix='revision=%s' % hashutil.hash_to_hex(rev['id'])
+ )
except Exception as e:
self.log.exception(
'Problem when indexing rev: %r', e)
@@ -181,7 +184,7 @@
self.idx_storage.revision_metadata_add(
results, conflict_update=(policy_update == 'update-dups'))
- def translate_revision_metadata(self, detected_files):
+ def translate_revision_metadata(self, detected_files, log_suffix):
"""
Determine plan of action to translate metadata when containing
one or multiple detected files:
@@ -236,7 +239,8 @@
# content indexing
try:
c_metadata_indexer.run(sha1s_filtered,
- policy_update='ignore-dups')
+ policy_update='ignore-dups',
+ log_suffix=log_suffix)
# on the fly possibility:
for result in c_metadata_indexer.results:
local_metadata = result['translated_metadata']
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -24,7 +24,7 @@
def register_mapping(cls):
- MAPPINGS[cls.__name__] = cls()
+ MAPPINGS[cls.__name__] = cls
return cls
@@ -70,13 +70,15 @@
- inherit this class
- override translate function
"""
- def __init__(self):
+ def __init__(self, log_suffix=''):
+ self.log_suffix = log_suffix
self.log = logging.getLogger('%s.%s' % (
self.__class__.__module__,
self.__class__.__name__))
+ @classmethod
@abc.abstractmethod
- def detect_metadata_files(self, files):
+ def detect_metadata_files(cls, files):
"""
Detects files potentially containing metadata
@@ -105,9 +107,10 @@
"""The .json file to extract metadata from."""
pass
- def detect_metadata_files(self, file_entries):
+ @classmethod
+ def detect_metadata_files(cls, file_entries):
for entry in file_entries:
- if entry['name'] == self.filename:
+ if entry['name'] == cls.filename:
return [entry['sha1']]
return []
@@ -185,12 +188,12 @@
try:
raw_content = raw_content.decode()
except UnicodeDecodeError:
- self.log.warning('Error unidecoding %r', raw_content)
+ self.log.warning('Error unidecoding from %s', self.log_suffix)
return
try:
content_dict = json.loads(raw_content)
except json.JSONDecodeError:
- self.log.warning('Error unjsoning %r' % raw_content)
+ self.log.warning('Error unjsoning from %s', self.log_suffix)
return
return self.translate_dict(content_dict)
@@ -356,7 +359,7 @@
try:
d = xmltodict.parse(content).get('project') or {}
except xml.parsers.expat.ExpatError:
- self.log.warning('Error parsing XML of %r', content)
+ self.log.warning('Error parsing XML from %s', self.log_suffix)
return None
metadata = self.translate_dict(d, normalize=False)
metadata[SCHEMA_URI+'codeRepository'] = self.parse_repositories(d)
@@ -522,7 +525,8 @@
mapping = CROSSWALK_TABLE['Ruby Gem']
- def detect_metadata_files(self, file_entries):
+ @classmethod
+ def detect_metadata_files(cls, file_entries):
for entry in file_entries:
if entry['name'].endswith(b'.gemspec'):
return [entry['sha1']]
@@ -532,7 +536,7 @@
try:
raw_content = raw_content.decode()
except UnicodeDecodeError:
- self.log.warning('Error unidecoding %r', raw_content)
+ self.log.warning('Error unidecoding from %s', self.log_suffix)
return
# Skip lines before 'Gem::Specification.new'
@@ -543,8 +547,8 @@
try:
next(lines) # Consume 'Gem::Specification.new'
except StopIteration:
- self.log.warning('Could not find Gem::Specification in %r',
- raw_content)
+ self.log.warning('Could not find Gem::Specification in %s',
+ self.log_suffix)
return
content_dict = {}
@@ -617,19 +621,3 @@
if isinstance(authors, list):
return {"@list": [author for author in authors
if isinstance(author, str)]}
-
-
-def main():
- raw_content = """{"name": "test_name", "unknown_term": "ut"}"""
- raw_content1 = b"""{"name": "test_name",
- "unknown_term": "ut",
- "prerequisites" :"packageXYZ"}"""
- result = MAPPINGS["NpmMapping"].translate(raw_content)
- result1 = MAPPINGS["MavenMapping"].translate(raw_content1)
-
- print(result)
- print(result1)
-
-
-if __name__ == "__main__":
- main()
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -129,7 +129,7 @@
# None if no metadata was found or an error occurred
declared_metadata = None
# when
- result = MAPPINGS["NpmMapping"].translate(content)
+ result = MAPPINGS["NpmMapping"]().translate(content)
# then
self.assertEqual(declared_metadata, result)
@@ -169,7 +169,7 @@
}
# when
- result = MAPPINGS["NpmMapping"].translate(content)
+ result = MAPPINGS["NpmMapping"]().translate(content)
# then
self.assertEqual(declared_metadata, result)
@@ -294,7 +294,7 @@
"email": "foo@example.com"
}
}"""
- result = MAPPINGS["NpmMapping"].translate(package_json)
+ result = MAPPINGS["NpmMapping"]().translate(package_json)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'name': 'foo',
@@ -309,7 +309,7 @@
"email": "foo@example.com"
}
}"""
- result = MAPPINGS["NpmMapping"].translate(package_json)
+ result = MAPPINGS["NpmMapping"]().translate(package_json)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'name': 'foo',
@@ -321,7 +321,7 @@
"name": "foo",
"bugs": "https://github.com/owner/project/issues"
}"""
- result = MAPPINGS["NpmMapping"].translate(package_json)
+ result = MAPPINGS["NpmMapping"]().translate(package_json)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'name': 'foo',
@@ -338,7 +338,7 @@
"url" : "https://github.com/npm/cli.git"
}
}"""
- result = MAPPINGS["NpmMapping"].translate(package_json)
+ result = MAPPINGS["NpmMapping"]().translate(package_json)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'name': 'foo',
@@ -353,7 +353,7 @@
"type" : "git"
}
}"""
- result = MAPPINGS["NpmMapping"].translate(package_json)
+ result = MAPPINGS["NpmMapping"]().translate(package_json)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'name': 'foo',
@@ -365,7 +365,7 @@
"name": "foo",
"repository": "github:npm/cli"
}"""
- result = MAPPINGS["NpmMapping"].translate(package_json)
+ result = MAPPINGS["NpmMapping"]().translate(package_json)
expected_result = {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'name': 'foo',
@@ -379,7 +379,7 @@
"name": "foo",
"repository": "npm/cli"
}"""
- result = MAPPINGS["NpmMapping"].translate(package_json)
+ result = MAPPINGS["NpmMapping"]().translate(package_json)
self.assertEqual(result, expected_result)
# gitlab shortcut
@@ -387,7 +387,7 @@
"name": "foo",
"repository": "gitlab:user/repo"
}"""
- result = MAPPINGS["NpmMapping"].translate(package_json)
+ result = MAPPINGS["NpmMapping"]().translate(package_json)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'name': 'foo',
@@ -541,7 +541,7 @@
"datePublished": "2017-06-05",
"programmingLanguage": "JSON-LD"
}
- result = MAPPINGS["CodemetaMapping"].translate(raw_content)
+ result = MAPPINGS["CodemetaMapping"]().translate(raw_content)
self.assertEqual(result, expected_result)
def test_compute_metadata_maven(self):
@@ -572,7 +572,7 @@
"""
- result = MAPPINGS["MavenMapping"].translate(raw_content)
+ result = MAPPINGS["MavenMapping"]().translate(raw_content)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
@@ -588,7 +588,7 @@
raw_content = b"""
"""
- result = MAPPINGS["MavenMapping"].translate(raw_content)
+ result = MAPPINGS["MavenMapping"]().translate(raw_content)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
@@ -599,20 +599,29 @@
"""
- result = MAPPINGS["MavenMapping"].translate(raw_content)
+ result = MAPPINGS["MavenMapping"]().translate(raw_content)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
})
def test_compute_metadata_maven_invalid_xml(self):
+ expected_warning = (
+ 'WARNING:swh.indexer.metadata_dictionary.MavenMapping:'
+ 'Error parsing XML from foo')
raw_content = b"""
"""
- result = MAPPINGS["MavenMapping"].translate(raw_content)
+ with self.assertLogs('swh.indexer.metadata_dictionary',
+ level='WARNING') as cm:
+ result = MAPPINGS["MavenMapping"]('foo').translate(raw_content)
+ self.assertEqual(cm.output, [expected_warning])
self.assertEqual(result, None)
raw_content = b"""
"""
- result = MAPPINGS["MavenMapping"].translate(raw_content)
+ with self.assertLogs('swh.indexer.metadata_dictionary',
+ level='WARNING') as cm:
+ result = MAPPINGS["MavenMapping"]('foo').translate(raw_content)
+ self.assertEqual(cm.output, [expected_warning])
self.assertEqual(result, None)
def test_compute_metadata_maven_minimal(self):
@@ -624,7 +633,7 @@
my-app
1.2.3
"""
- result = MAPPINGS["MavenMapping"].translate(raw_content)
+ result = MAPPINGS["MavenMapping"]().translate(raw_content)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
@@ -674,7 +683,7 @@
"""
- result = MAPPINGS["MavenMapping"].translate(raw_content)
+ result = MAPPINGS["MavenMapping"]().translate(raw_content)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
@@ -722,7 +731,7 @@
Description-Content-Type: text/markdown
Provides-Extra: testing
""") # noqa
- result = MAPPINGS["PythonPkginfoMapping"].translate(raw_content)
+ result = MAPPINGS["PythonPkginfoMapping"]().translate(raw_content)
self.assertCountEqual(result['description'], [
'Software Heritage core utilities', # note the comma here
'swh-core\n'
@@ -771,7 +780,7 @@
Name: foo
License: MIT
""") # noqa
- result = MAPPINGS["PythonPkginfoMapping"].translate(raw_content)
+ result = MAPPINGS["PythonPkginfoMapping"]().translate(raw_content)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
@@ -793,7 +802,7 @@
s.homepage = 'https://rubygems.org/gems/example'
s.metadata = { "source_code_uri" => "https://github.com/example/example" }
end"""
- result = MAPPINGS['GemspecMapping'].translate(raw_content)
+ result = MAPPINGS['GemspecMapping']().translate(raw_content)
self.assertCountEqual(result.pop('description'), [
"This is an example!",
"Much longer explanation of the example!"
@@ -815,7 +824,7 @@
s.authors = ["Ruby Coder1"]
s.author = "Ruby Coder2"
end"""
- result = MAPPINGS['GemspecMapping'].translate(raw_content)
+ result = MAPPINGS['GemspecMapping']().translate(raw_content)
self.assertCountEqual(result.pop('author'), [
'Ruby Coder1', 'Ruby Coder2'])
self.assertEqual(result, {
@@ -828,7 +837,7 @@
Gem::Specification.new do |s|
s.author = ["Ruby Coder"]
end"""
- result = MAPPINGS['GemspecMapping'].translate(raw_content)
+ result = MAPPINGS['GemspecMapping']().translate(raw_content)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
@@ -837,7 +846,7 @@
Gem::Specification.new do |s|
s.author = "Ruby Coder1",
end"""
- result = MAPPINGS['GemspecMapping'].translate(raw_content)
+ result = MAPPINGS['GemspecMapping']().translate(raw_content)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
@@ -846,7 +855,7 @@
Gem::Specification.new do |s|
s.authors = ["Ruby Coder1", ["Ruby Coder2"]]
end"""
- result = MAPPINGS['GemspecMapping'].translate(raw_content)
+ result = MAPPINGS['GemspecMapping']().translate(raw_content)
self.assertEqual(result, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',