Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary/python.py
# Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import email.parser | import email.parser | ||||
import email.policy | import email.policy | ||||
import itertools | import itertools | ||||
from typing import Any, Dict, List | from typing import Any, Dict, List, Optional, Tuple, Union | ||||
from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI | from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI | ||||
from .base import DictMapping, SingleFileMapping | from .base import DictMapping, SingleFileMapping | ||||
_normalize_pkginfo_key = str.lower | _normalize_pkginfo_key = str.lower | ||||
Show All 22 Lines | string_fields = [ | ||||
"description", | "description", | ||||
"summary", | "summary", | ||||
"author", | "author", | ||||
"author-email", | "author-email", | ||||
] | ] | ||||
_parser = email.parser.BytesHeaderParser(policy=LinebreakPreservingEmailPolicy()) | _parser = email.parser.BytesHeaderParser(policy=LinebreakPreservingEmailPolicy()) | ||||
def translate(self, content: bytes) -> Dict[str, Any]: | def translate( | ||||
self, content: bytes | |||||
) -> Optional[Dict[str, Union[str, List[Any], Dict[str, Any], Tuple[str]]]]: | |||||
msg = self._parser.parsebytes(content) | msg = self._parser.parsebytes(content) | ||||
d: Dict[str, List[str]] = {} | d: Dict[str, List[str]] = {} | ||||
for (key, value) in msg.items(): | for (key, value) in msg.items(): | ||||
key = _normalize_pkginfo_key(key) | key = _normalize_pkginfo_key(key) | ||||
if value != "UNKNOWN": | if value != "UNKNOWN": | ||||
d.setdefault(key, []).append(value) | d.setdefault(key, []).append(value) | ||||
metadata = self._translate_dict(d, normalize=False) | metadata = self._translate_dict(d, normalize=False) | ||||
if SCHEMA_URI + "author" in metadata or SCHEMA_URI + "email" in metadata: | if SCHEMA_URI + "author" in metadata or SCHEMA_URI + "email" in metadata: | ||||
Show All 21 Lines |