Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7343074
D957.id3051.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D957.id3051.diff
View Options
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -25,6 +25,36 @@
return cls
+def merge_values(v1, v2):
+ """If v1 and v2 are of the form `{"@list": l1}` and `{"@list": l2}`,
+ returns `{"@list": l1 + l2}`.
+ Otherwise, make them lists (if they are not already) and concatenate
+ them.
+
+ >>> merge_values('a', 'b')
+ ['a', 'b']
+ >>> merge_values(['a', 'b'], 'c')
+ ['a', 'b', 'c']
+ >>> merge_values({'@list': ['a', 'b']}, {'@list': ['c']})
+ {'@list': ['a', 'b', 'c']}
+ """
+ if isinstance(v1, dict) and set(v1) == {'@list'}:
+ assert isinstance(v1['@list'], list)
+ if isinstance(v2, dict) and set(v2) == {'@list'}:
+ assert isinstance(v2['@list'], list)
+ return {'@list': v1['@list'] + v2['@list']}
+ else:
+ raise ValueError('Cannot merge %r and %r' % (v1, v2))
+ else:
+ if isinstance(v2, dict) and '@list' in v2:
+ raise ValueError('Cannot merge %r and %r' % (v1, v2))
+ if not isinstance(v1, list):
+ v1 = [v1]
+ if not isinstance(v2, list):
+ v2 = [v2]
+ return v1 + v2
+
+
class BaseMapping(metaclass=abc.ABCMeta):
"""Base class for mappings to inherit from
@@ -109,6 +139,7 @@
elif k in self.mapping:
# if there is no method, but the key is known from the
# crosswalk table
+ codemeta_key = self.mapping[k]
# if there is a normalization method, use it on the value
normalization_method = getattr(
@@ -117,7 +148,11 @@
v = normalization_method(v)
# set the translation metadata with the normalized value
- translated_metadata[self.mapping[k]] = v
+ if codemeta_key in translated_metadata:
+ translated_metadata[codemeta_key] = merge_values(
+ translated_metadata[codemeta_key], v)
+ else:
+ translated_metadata[codemeta_key] = v
if normalize:
return self.normalize_translation(translated_metadata)
else:
@@ -391,14 +426,6 @@
}
return self.normalize_translation(metadata)
- def translate_summary(self, translated_metadata, v):
- k = self.mapping['summary']
- translated_metadata.setdefault(k, []).append(v)
-
- def translate_description(self, translated_metadata, v):
- k = self.mapping['description']
- translated_metadata.setdefault(k, []).append(v)
-
def normalize_home_page(self, urls):
return [{'@id': url} for url in urls]
@@ -468,21 +495,11 @@
for license in licenses
if isinstance(license, str)]
- def translate_author(self, translated_metadata, v):
- k = self.mapping['author']
- translated_metadata.setdefault(k, {"@list": []})["@list"].append(v)
-
- def translate_authors(self, translated_metadata, v):
- k = self.mapping['authors']
- translated_metadata.setdefault(k, {"@list": []})["@list"].extend(v)
-
- def translate_summary(self, translated_metadata, v):
- k = self.mapping['summary']
- translated_metadata.setdefault(k, []).append(v)
+ def normalize_author(self, author):
+ return {"@list": [author]}
- def translate_description(self, translated_metadata, v):
- k = self.mapping['description']
- translated_metadata.setdefault(k, []).append(v)
+ def normalize_authors(self, authors):
+ return {"@list": authors}
def main():
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -7,7 +7,8 @@
from swh.model.hashutil import hash_to_bytes
-from swh.indexer.metadata_dictionary import CROSSWALK_TABLE, MAPPINGS
+from swh.indexer.metadata_dictionary import (
+ CROSSWALK_TABLE, MAPPINGS, merge_values)
from swh.indexer.metadata_detector import (
detect_metadata, extract_minimal_metadata_dict
)
@@ -82,6 +83,31 @@
'homepage': 'http://schema.org/url'
})
+ def test_merge_values(self):
+ self.assertEqual(
+ merge_values('a', 'b'),
+ ['a', 'b'])
+ self.assertEqual(
+ merge_values(['a', 'b'], 'c'),
+ ['a', 'b', 'c'])
+ self.assertEqual(
+ merge_values('a', ['b', 'c']),
+ ['a', 'b', 'c'])
+ self.assertEqual(
+ merge_values({'@list': ['a']}, {'@list': ['b']}),
+ {'@list': ['a', 'b']})
+ self.assertEqual(
+ merge_values({'@list': ['a', 'b']}, {'@list': ['c']}),
+ {'@list': ['a', 'b', 'c']})
+ with self.assertRaises(ValueError):
+ merge_values({'@list': ['a']}, 'b')
+ with self.assertRaises(ValueError):
+ merge_values('a', {'@list': ['b']})
+ with self.assertRaises(ValueError):
+ merge_values({'@list': ['a']}, ['b'])
+ with self.assertRaises(ValueError):
+ merge_values(['a'], {'@list': ['b']})
+
def test_compute_metadata_none(self):
"""
testing content empty content is empty
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mar 17 2025, 7:09 PM (7 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225735
Attached To
D957: Factorize list merges in indexer mappings
Event Timeline
Log In to Comment