Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary.py
# Copyright (C) 2017 The Software Heritage developers | # Copyright (C) 2017 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import csv | |||||
import json | import json | ||||
import os.path | |||||
import swh.indexer | |||||
CROSSWALK_TABLE_PATH = os.path.join(os.path.dirname(swh.indexer.__file__), | |||||
'data', 'codemeta', 'crosswalk.csv') | |||||
def read_crosstable(fd): | |||||
reader = csv.reader(fd) | |||||
try: | |||||
header = next(reader) | |||||
except StopIteration: | |||||
raise ValueError('empty file') | |||||
data_sources = set(header) - {'Parent Type', 'Property', | |||||
'Type', 'Description'} | |||||
assert 'codemeta-V1' in data_sources | |||||
codemeta_translation = {data_source: {} for data_source in data_sources} | |||||
for line in reader: | |||||
codemeta_name = dict(zip(header, line))['Property'] | |||||
for (col, value) in zip(header, line): | |||||
if col in data_sources: | |||||
for local_name in value.split('/'): | |||||
if local_name.strip(): | |||||
codemeta_translation[col][local_name.strip()] = \ | |||||
codemeta_name | |||||
return codemeta_translation | |||||
with open(CROSSWALK_TABLE_PATH) as fd: | |||||
CROSSWALK_TABLE = read_crosstable(fd) | |||||
def convert(raw_content): | def convert(raw_content): | ||||
""" | """ | ||||
convert raw_content recursively: | convert raw_content recursively: | ||||
- from bytes to string | - from bytes to string | ||||
- from string to dict | - from string to dict | ||||
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines | def translate(self, content_dict): | ||||
return None | return None | ||||
return translated_metadata | return translated_metadata | ||||
class NpmMapping(BaseMapping): | class NpmMapping(BaseMapping): | ||||
""" | """ | ||||
dedicated class for NPM (package.json) mapping and translation | dedicated class for NPM (package.json) mapping and translation | ||||
""" | """ | ||||
mapping = { | mapping = CROSSWALK_TABLE['NodeJS'] | ||||
'repository': 'codeRepository', | |||||
'os': 'operatingSystem', | |||||
'cpu': 'processorRequirements', | |||||
'engines': 'processorRequirements', | |||||
'dependencies': 'softwareRequirements', | |||||
'bundleDependencies': 'softwareRequirements', | |||||
'peerDependencies': 'softwareRequirements', | |||||
'author': 'author', | |||||
'contributor': 'contributor', | |||||
'keywords': 'keywords', | |||||
'license': 'license', | |||||
'version': 'version', | |||||
'description': 'description', | |||||
'name': 'name', | |||||
'devDependencies': 'softwareSuggestions', | |||||
'optionalDependencies': 'softwareSuggestions', | |||||
'bugs': 'issueTracker', | |||||
'homepage': 'url' | |||||
} | |||||
def translate(self, raw_content): | def translate(self, raw_content): | ||||
content_dict = convert(raw_content) | content_dict = convert(raw_content) | ||||
return super().translate(content_dict) | return super().translate(content_dict) | ||||
class MavenMapping(BaseMapping): | class MavenMapping(BaseMapping): | ||||
""" | """ | ||||
dedicated class for Maven (pom.xml) mapping and translation | dedicated class for Maven (pom.xml) mapping and translation | ||||
""" | """ | ||||
mapping = { | mapping = CROSSWALK_TABLE['Java (Maven)'] | ||||
'license': 'license', | |||||
'version': 'version', | |||||
'description': 'description', | |||||
'name': 'name', | |||||
'prerequisites': 'softwareRequirements', | |||||
'repositories': 'codeRepository', | |||||
'groupId': 'identifier', | |||||
'ciManagement': 'contIntegration', | |||||
'issuesManagement': 'issueTracker', | |||||
} | |||||
def translate(self, raw_content): | def translate(self, raw_content): | ||||
content = convert(raw_content) | content = convert(raw_content) | ||||
# parse content from xml to dict | # parse content from xml to dict | ||||
return super().translate(content) | return super().translate(content) | ||||
class DoapMapping(BaseMapping): | class DoapMapping(BaseMapping): | ||||
▲ Show 20 Lines • Show All 70 Lines • Show Last 20 Lines |