Page MenuHomeSoftware Heritage

D957.id3051.diff
No OneTemporary

D957.id3051.diff

diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -25,6 +25,36 @@
return cls
+def merge_values(v1, v2):
+ """If v1 and v2 are of the form `{"@list": l1}` and `{"@list": l2}`,
+ returns `{"@list": l1 + l2}`.
+ Otherwise, make them lists (if they are not already) and concatenate
+ them.
+
+ >>> merge_values('a', 'b')
+ ['a', 'b']
+ >>> merge_values(['a', 'b'], 'c')
+ ['a', 'b', 'c']
+ >>> merge_values({'@list': ['a', 'b']}, {'@list': ['c']})
+ {'@list': ['a', 'b', 'c']}
+ """
+ if isinstance(v1, dict) and set(v1) == {'@list'}:
+ assert isinstance(v1['@list'], list)
+ if isinstance(v2, dict) and set(v2) == {'@list'}:
+ assert isinstance(v2['@list'], list)
+ return {'@list': v1['@list'] + v2['@list']}
+ else:
+ raise ValueError('Cannot merge %r and %r' % (v1, v2))
+ else:
+ if isinstance(v2, dict) and '@list' in v2:
+ raise ValueError('Cannot merge %r and %r' % (v1, v2))
+ if not isinstance(v1, list):
+ v1 = [v1]
+ if not isinstance(v2, list):
+ v2 = [v2]
+ return v1 + v2
+
+
class BaseMapping(metaclass=abc.ABCMeta):
"""Base class for mappings to inherit from
@@ -109,6 +139,7 @@
elif k in self.mapping:
# if there is no method, but the key is known from the
# crosswalk table
+ codemeta_key = self.mapping[k]
# if there is a normalization method, use it on the value
normalization_method = getattr(
@@ -117,7 +148,11 @@
v = normalization_method(v)
# set the translation metadata with the normalized value
- translated_metadata[self.mapping[k]] = v
+ if codemeta_key in translated_metadata:
+ translated_metadata[codemeta_key] = merge_values(
+ translated_metadata[codemeta_key], v)
+ else:
+ translated_metadata[codemeta_key] = v
if normalize:
return self.normalize_translation(translated_metadata)
else:
@@ -391,14 +426,6 @@
}
return self.normalize_translation(metadata)
- def translate_summary(self, translated_metadata, v):
- k = self.mapping['summary']
- translated_metadata.setdefault(k, []).append(v)
-
- def translate_description(self, translated_metadata, v):
- k = self.mapping['description']
- translated_metadata.setdefault(k, []).append(v)
-
def normalize_home_page(self, urls):
return [{'@id': url} for url in urls]
@@ -468,21 +495,11 @@
for license in licenses
if isinstance(license, str)]
- def translate_author(self, translated_metadata, v):
- k = self.mapping['author']
- translated_metadata.setdefault(k, {"@list": []})["@list"].append(v)
-
- def translate_authors(self, translated_metadata, v):
- k = self.mapping['authors']
- translated_metadata.setdefault(k, {"@list": []})["@list"].extend(v)
-
- def translate_summary(self, translated_metadata, v):
- k = self.mapping['summary']
- translated_metadata.setdefault(k, []).append(v)
+ def normalize_author(self, author):
+ return {"@list": [author]}
- def translate_description(self, translated_metadata, v):
- k = self.mapping['description']
- translated_metadata.setdefault(k, []).append(v)
+ def normalize_authors(self, authors):
+ return {"@list": authors}
def main():
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -7,7 +7,8 @@
from swh.model.hashutil import hash_to_bytes
-from swh.indexer.metadata_dictionary import CROSSWALK_TABLE, MAPPINGS
+from swh.indexer.metadata_dictionary import (
+ CROSSWALK_TABLE, MAPPINGS, merge_values)
from swh.indexer.metadata_detector import (
detect_metadata, extract_minimal_metadata_dict
)
@@ -82,6 +83,31 @@
'homepage': 'http://schema.org/url'
})
+ def test_merge_values(self):
+ self.assertEqual(
+ merge_values('a', 'b'),
+ ['a', 'b'])
+ self.assertEqual(
+ merge_values(['a', 'b'], 'c'),
+ ['a', 'b', 'c'])
+ self.assertEqual(
+ merge_values('a', ['b', 'c']),
+ ['a', 'b', 'c'])
+ self.assertEqual(
+ merge_values({'@list': ['a']}, {'@list': ['b']}),
+ {'@list': ['a', 'b']})
+ self.assertEqual(
+ merge_values({'@list': ['a', 'b']}, {'@list': ['c']}),
+ {'@list': ['a', 'b', 'c']})
+ with self.assertRaises(ValueError):
+ merge_values({'@list': ['a']}, 'b')
+ with self.assertRaises(ValueError):
+ merge_values('a', {'@list': ['b']})
+ with self.assertRaises(ValueError):
+ merge_values({'@list': ['a']}, ['b'])
+ with self.assertRaises(ValueError):
+ merge_values(['a'], {'@list': ['b']})
+
def test_compute_metadata_none(self):
"""
testing content empty content is empty

File Metadata

Mime Type
text/plain
Expires
Mar 17 2025, 7:09 PM (7 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225735

Event Timeline