Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/codemeta.py
Show First 20 Lines • Show All 134 Lines • ▼ Show 20 Lines | def merge_documents(documents): | ||||
"""Takes a list of metadata dicts, each generated from a different | """Takes a list of metadata dicts, each generated from a different | ||||
metadata file, and merges them. | metadata file, and merges them. | ||||
Removes duplicates, if any.""" | Removes duplicates, if any.""" | ||||
documents = list(itertools.chain.from_iterable(map(expand, documents))) | documents = list(itertools.chain.from_iterable(map(expand, documents))) | ||||
merged_document = collections.defaultdict(list) | merged_document = collections.defaultdict(list) | ||||
for document in documents: | for document in documents: | ||||
for (key, values) in document.items(): | for (key, values) in document.items(): | ||||
if key == '@id': | |||||
# @id does not get expanded to a list | |||||
value = values | |||||
# Only one @id is allowed, move it to sameAs | |||||
if '@id' not in merged_document: | |||||
merged_document['@id'] = value | |||||
elif value != merged_document['@id']: | |||||
if value not in merged_document[SCHEMA_URI + 'sameAs']: | |||||
merged_document[SCHEMA_URI + 'sameAs'].append(value) | |||||
else: | |||||
for value in values: | for value in values: | ||||
if value not in merged_document[key]: | if value not in merged_document[key]: | ||||
merged_document[key].append(value) | merged_document[key].append(value) | ||||
return compact(merged_document) | return compact(merged_document) |