Page MenuHomeSoftware Heritage

D2504.id8903.diff
No OneTemporary

D2504.id8903.diff

diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py
--- a/swh/indexer/codemeta.py
+++ b/swh/indexer/codemeta.py
@@ -140,8 +140,19 @@
merged_document = collections.defaultdict(list)
for document in documents:
for (key, values) in document.items():
- for value in values:
- if value not in merged_document[key]:
- merged_document[key].append(value)
+ if key == '@id':
+ # @id does not get expanded to a list
+ value = values
+
+ # Only one @id is allowed, move it to sameAs
+ if '@id' not in merged_document:
+ merged_document['@id'] = value
+ elif value != merged_document['@id']:
+ if value not in merged_document[SCHEMA_URI + 'sameAs']:
+ merged_document[SCHEMA_URI + 'sameAs'].append(value)
+ else:
+ for value in values:
+ if value not in merged_document[key]:
+ merged_document[key].append(value)
return compact(merged_document)
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -237,6 +237,58 @@
}
self.assertEqual(expected_results, results)
+ def test_merge_documents_ids(self):
+ # given
+ metadata_list = [{
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'id': 'http://example.org/test1',
+ 'name': 'test_1',
+ }, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'id': 'http://example.org/test2',
+ 'name': 'test_2',
+ }]
+
+ # when
+ results = merge_documents(metadata_list)
+
+ # then
+ expected_results = {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'id': 'http://example.org/test1',
+ 'schema:sameAs': 'http://example.org/test2',
+ "name": ['test_1', 'test_2']
+ }
+ self.assertEqual(expected_results, results)
+
+ def test_merge_documents_duplicate_ids(self):
+ # given
+ metadata_list = [{
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'id': 'http://example.org/test1',
+ 'name': 'test_1',
+ }, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'id': 'http://example.org/test1',
+ 'name': 'test_1b',
+ }, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'id': 'http://example.org/test2',
+ 'name': 'test_2',
+ }]
+
+ # when
+ results = merge_documents(metadata_list)
+
+ # then
+ expected_results = {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'id': 'http://example.org/test1',
+ 'schema:sameAs': 'http://example.org/test2',
+ "name": ['test_1', 'test_1b', 'test_2']
+ }
+ self.assertEqual(expected_results, results)
+
def test_index_content_metadata_npm(self):
"""
testing NPM with package.json

File Metadata

Mime Type
text/plain
Expires
Jul 27 2024, 10:36 PM (11 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216532

Event Timeline