Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/codemeta.py
# Copyright (C) 2018 The Software Heritage developers | # Copyright (C) 2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import collections | |||||
import csv | import csv | ||||
import itertools | |||||
import json | import json | ||||
import os.path | import os.path | ||||
import re | import re | ||||
import swh.indexer | import swh.indexer | ||||
from pyld import jsonld | from pyld import jsonld | ||||
_DATA_DIR = os.path.join(os.path.dirname(swh.indexer.__file__), 'data') | _DATA_DIR = os.path.join(os.path.dirname(swh.indexer.__file__), 'data') | ||||
▲ Show 20 Lines • Show All 107 Lines • ▼ Show 20 Lines | def compact(doc): | ||||
return jsonld.compact(doc, CODEMETA_CONTEXT_URL, | return jsonld.compact(doc, CODEMETA_CONTEXT_URL, | ||||
options={'documentLoader': _document_loader}) | options={'documentLoader': _document_loader}) | ||||
def expand(doc): | def expand(doc): | ||||
"""Same as `pyld.jsonld.expand`, but in the context of CodeMeta.""" | """Same as `pyld.jsonld.expand`, but in the context of CodeMeta.""" | ||||
return jsonld.expand(doc, | return jsonld.expand(doc, | ||||
options={'documentLoader': _document_loader}) | options={'documentLoader': _document_loader}) | ||||
def merge_documents(documents): | |||||
"""Takes a list of metadata dicts, each generated from a different | |||||
metadata file, and merges them. | |||||
Removes duplicates, if any.""" | |||||
documents = list(itertools.chain.from_iterable(map(expand, documents))) | |||||
merged_document = collections.defaultdict(list) | |||||
for document in documents: | |||||
for (key, values) in document.items(): | |||||
for value in values: | |||||
if value not in merged_document[key]: | |||||
merged_document[key].append(value) | |||||
return compact(merged_document) |