Changeset View
Changeset View
Standalone View
Standalone View
swh/model/identifiers.py
# Copyright (C) 2015-2019 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import binascii | import binascii | ||||
import datetime | import datetime | ||||
from functools import lru_cache | from functools import lru_cache | ||||
import hashlib | import hashlib | ||||
import re | |||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union | from typing import Any, Dict, Iterable, List, Optional, Tuple, Union | ||||
import attr | import attr | ||||
from .collections import ImmutableDict | from .collections import ImmutableDict | ||||
from .exceptions import ValidationError | from .exceptions import ValidationError | ||||
from .fields.hashes import validate_sha1 | from .fields.hashes import validate_sha1 | ||||
from .hashutil import MultiHash, hash_git_data, hash_to_hex | from .hashutil import MultiHash, hash_git_data, hash_to_hex | ||||
▲ Show 20 Lines • Show All 800 Lines • ▼ Show 20 Lines | Raises: | ||||
* invalid type supplied | * invalid type supplied | ||||
* missing hash | * missing hash | ||||
* invalid hash identifier supplied | * invalid hash identifier supplied | ||||
Returns: | Returns: | ||||
a named tuple holding the parsing result | a named tuple holding the parsing result | ||||
""" | """ | ||||
if re.search(r"[ \t\n\r\f\v]", swhid): | |||||
raise ValidationError("Invalid SwHID: SWHIDs cannot contain whitespaces") | |||||
# <swhid>;<contextual-information> | # <swhid>;<contextual-information> | ||||
swhid_parts = swhid.split(SWHID_CTXT_SEP) | swhid_parts = swhid.split(SWHID_CTXT_SEP) | ||||
swhid_data = swhid_parts.pop(0).split(":") | swhid_data = swhid_parts.pop(0).split(":") | ||||
if len(swhid_data) != 4: | if len(swhid_data) != 4: | ||||
raise ValidationError( | raise ValidationError( | ||||
"Invalid SWHID, format must be 'swh:1:OBJECT_TYPE:OBJECT_ID'" | "Invalid SWHID, format must be 'swh:1:OBJECT_TYPE:OBJECT_ID'" | ||||
) | ) | ||||
Show All 14 Lines | def parse_swhid(swhid: str) -> SWHID: | ||||
_metadata = {} | _metadata = {} | ||||
for part in swhid_parts: | for part in swhid_parts: | ||||
try: | try: | ||||
qualifier, val = part.split("=") | qualifier, val = part.split("=") | ||||
_metadata[qualifier] = val | _metadata[qualifier] = val | ||||
except Exception: | except Exception: | ||||
raise ValidationError( | raise ValidationError( | ||||
"Invalid SWHID: contextual data must be a ;-separated list of " | "Invalid SWHID: contextual data must be a ;-separated list of " | ||||
" key=value pairs" | " key=value pairs" | ||||
ardumont: I'm not so keen on that error message.
Feel free to propose something better here. | |||||
Done Inline ActionsAt the beginning of the function: if re.search(r"\s", swhid): raise ValidationError("Invalid SWHID: SWHIDs can not contain whitespaces.") vlorentz: At the beginning of the function: if `re.search(r"\s", swhid)`: raise ValidationError("Invalid… | |||||
Done Inline Actionsoh that'd be nice, but is that true? ardumont: oh that'd be nice, but is that true?
| |||||
Done Inline ActionsI read back the grammar and it's true alright ;) thanks. ardumont: I read back the grammar and it's true alright ;)
thanks.
| |||||
Not Done Inline Actionsuuuuh, actually no. nbsp would be matched by \s but is allowed in IRIs via ucschar. So check for [ \t\n\r\f\v] instead vlorentz: uuuuh, actually no. nbsp would be matched by `\s` but is allowed in IRIs via `ucschar`. So… | |||||
) | ) | ||||
wrong_qualifiers = set(_metadata) - set(CONTEXT_QUALIFIERS) | wrong_qualifiers = set(_metadata) - set(CONTEXT_QUALIFIERS) | ||||
if wrong_qualifiers: | if wrong_qualifiers: | ||||
error_msg = ( | error_msg = ( | ||||
f"Invalid SWHID: Wrong qualifiers {', '.join(wrong_qualifiers)}. " | f"Invalid SWHID: Wrong qualifiers {', '.join(wrong_qualifiers)}. " | ||||
f"The qualifiers must be one of {', '.join(CONTEXT_QUALIFIERS)}" | f"The qualifiers must be one of {', '.join(CONTEXT_QUALIFIERS)}" | ||||
) | ) | ||||
Show All 9 Lines |
I'm not so keen on that error message.
Feel free to propose something better here.