Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/codemeta.py
# Copyright (C) 2018 The Software Heritage developers | # Copyright (C) 2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import csv | import csv | ||||
import json | import json | ||||
import os.path | import os.path | ||||
import re | |||||
import swh.indexer | import swh.indexer | ||||
from pyld import jsonld | from pyld import jsonld | ||||
_DATA_DIR = os.path.join(os.path.dirname(swh.indexer.__file__), 'data') | _DATA_DIR = os.path.join(os.path.dirname(swh.indexer.__file__), 'data') | ||||
CROSSWALK_TABLE_PATH = os.path.join(_DATA_DIR, 'codemeta', 'crosswalk.csv') | CROSSWALK_TABLE_PATH = os.path.join(_DATA_DIR, 'codemeta', 'crosswalk.csv') | ||||
Show All 12 Lines | PROPERTY_BLACKLIST = { | ||||
# CodeMeta properties that we cannot properly represent. | # CodeMeta properties that we cannot properly represent. | ||||
SCHEMA_URI + 'softwareRequirements', | SCHEMA_URI + 'softwareRequirements', | ||||
CODEMETA_URI + 'softwareSuggestions', | CODEMETA_URI + 'softwareSuggestions', | ||||
# Duplicate of 'author' | # Duplicate of 'author' | ||||
SCHEMA_URI + 'creator', | SCHEMA_URI + 'creator', | ||||
} | } | ||||
_codemeta_field_separator = re.compile(r'\s*[,/]\s*') | |||||
def make_absolute_uri(local_name): | def make_absolute_uri(local_name): | ||||
definition = CODEMETA_CONTEXT['@context'][local_name] | definition = CODEMETA_CONTEXT['@context'][local_name] | ||||
if isinstance(definition, str): | if isinstance(definition, str): | ||||
return definition | return definition | ||||
elif isinstance(definition, dict): | elif isinstance(definition, dict): | ||||
prefixed_name = definition['@id'] | prefixed_name = definition['@id'] | ||||
(prefix, local_name) = prefixed_name.split(':') | (prefix, local_name) = prefixed_name.split(':') | ||||
Show All 26 Lines | for line in reader: # For each canonical name | ||||
if not local_name: | if not local_name: | ||||
continue | continue | ||||
canonical_name = make_absolute_uri(local_name) | canonical_name = make_absolute_uri(local_name) | ||||
if canonical_name in PROPERTY_BLACKLIST: | if canonical_name in PROPERTY_BLACKLIST: | ||||
continue | continue | ||||
for (col, value) in zip(header, line): # For each cell in the row | for (col, value) in zip(header, line): # For each cell in the row | ||||
if col in data_sources: | if col in data_sources: | ||||
# If that's not the parentType/property/type/description | # If that's not the parentType/property/type/description | ||||
for local_name in value.split('/'): | for local_name in _codemeta_field_separator.split(value): | ||||
# For each of the data source's properties that maps | # For each of the data source's properties that maps | ||||
# to this canonical name | # to this canonical name | ||||
if local_name.strip(): | if local_name.strip(): | ||||
codemeta_translation[col][local_name.strip()] = \ | codemeta_translation[col][local_name.strip()] = \ | ||||
canonical_name | canonical_name | ||||
return codemeta_translation | return codemeta_translation | ||||
Show All 33 Lines |