Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_detector.py
# Copyright (C) 2017 The Software Heritage developers | # Copyright (C) 2017 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
mapping_filenames = { | from swh.indexer.metadata_dictionary import MAPPINGS | ||||
b"package.json": "npm", | |||||
b"codemeta.json": "codemeta" | |||||
} | |||||
def detect_metadata(files): | def detect_metadata(files): | ||||
""" | """ | ||||
Detects files potentially containing metadata | Detects files potentially containing metadata | ||||
Args: | Args: | ||||
- file_entries (list): list of files | - file_entries (list): list of files | ||||
Returns: | Returns: | ||||
- empty list if nothing was found | - empty list if nothing was found | ||||
- dictionary {mapping_filenames[name]:f['sha1']} | - dictionary {mapping_filenames[name]:f['sha1']} | ||||
""" | """ | ||||
results = {} | results = {} | ||||
for f in files: | for (mapping_name, mapping) in MAPPINGS.items(): | ||||
name = f['name'].lower().strip() | matches = mapping.detect_metadata_files(files) | ||||
# TODO: possibility to detect extensions | if matches: | ||||
if name in mapping_filenames: | results[mapping_name] = matches | ||||
tool = mapping_filenames[name] | |||||
if tool in results: | |||||
results[tool].append(f['sha1']) | |||||
else: | |||||
results[tool] = [f['sha1']] | |||||
return results | return results | ||||
def extract_minimal_metadata_dict(metadata_list): | def extract_minimal_metadata_dict(metadata_list): | ||||
""" | """ | ||||
Every item in the metadata_list is a dict of translated_metadata in the | Every item in the metadata_list is a dict of translated_metadata in the | ||||
CodeMeta vocabulary | CodeMeta vocabulary | ||||
we wish to extract a minimal set of terms and keep all values corresponding | we wish to extract a minimal set of terms and keep all values corresponding | ||||
Show All 33 Lines |