Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_detector.py
# Copyright (C) 2017 The Software Heritage developers | # Copyright (C) 2017 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from swh.indexer.codemeta import compact, expand | |||||
from swh.indexer.metadata_dictionary import MAPPINGS | from swh.indexer.metadata_dictionary import MAPPINGS | ||||
def detect_metadata(files): | def detect_metadata(files): | ||||
""" | """ | ||||
Detects files potentially containing metadata | Detects files potentially containing metadata | ||||
Args: | Args: | ||||
- file_entries (list): list of files | - file_entries (list): list of files | ||||
Show All 18 Lines | def extract_minimal_metadata_dict(metadata_list): | ||||
to this term without duplication | to this term without duplication | ||||
Args: | Args: | ||||
- metadata_list (list): list of dicts of translated_metadata | - metadata_list (list): list of dicts of translated_metadata | ||||
Returns: | Returns: | ||||
- minimal_dict (dict): one dict with selected values of metadata | - minimal_dict (dict): one dict with selected values of metadata | ||||
""" | """ | ||||
minimal_dict = { | minimal_dict = { | ||||
"developmentStatus": [], | "https://codemeta.github.io/terms/developmentStatus": [], | ||||
"version": [], | "https://codemeta.github.io/terms/version": [], | ||||
"operatingSystem": [], | "https://codemeta.github.io/terms/operatingSystem": [], | ||||
"description": [], | "https://codemeta.github.io/terms/description": [], | ||||
"keywords": [], | "https://codemeta.github.io/terms/keywords": [], | ||||
"issueTracker": [], | "https://codemeta.github.io/terms/issueTracker": [], | ||||
"name": [], | "https://codemeta.github.io/terms/name": [], | ||||
"author": [], | "https://codemeta.github.io/terms/author": [], | ||||
"relatedLink": [], | "https://codemeta.github.io/terms/relatedLink": [], | ||||
"url": [], | "https://codemeta.github.io/terms/url": [], | ||||
"license": [], | "https://codemeta.github.io/terms/license": [], | ||||
"maintainer": [], | "https://codemeta.github.io/terms/maintainer": [], | ||||
"email": [], | "https://codemeta.github.io/terms/email": [], | ||||
"softwareRequirements": [], | "https://codemeta.github.io/terms/identifier": [], | ||||
"identifier": [], | "https://codemeta.github.io/terms/codeRepository": [] | ||||
"codeRepository": [] | |||||
} | } | ||||
for term in minimal_dict.keys(): | for term in list(minimal_dict): | ||||
for metadata_item in metadata_list: | for document in metadata_list: | ||||
for metadata_item in expand(document): | |||||
if term in metadata_item: | if term in metadata_item: | ||||
if not metadata_item[term] in minimal_dict[term]: | if not metadata_item[term] in minimal_dict[term]: | ||||
minimal_dict[term].append(metadata_item[term]) | minimal_dict[term].append(metadata_item[term]) | ||||
if not minimal_dict[term]: | if minimal_dict[term] == []: | ||||
minimal_dict[term] = None | del minimal_dict[term] | ||||
return minimal_dict | return compact(minimal_dict) |